{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import glob\n",
    "import librosa\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from tqdm import tqdm_notebook as tqdm\n",
    "from multiprocessing import Pool\n",
    "from matplotlib import pylab as plt\n",
    "from collections import Counter\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "DATA_PATH = \"../../../Data/LJSpeech-1.1/wavs/\"\n",
    "META_PATH = \"../../../Data/LJSpeech-1.1/metadata.csv\"\n",
    "NUM_PROC = 8"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " > Number of audio files: 13100\n"
     ]
    }
   ],
   "source": [
    "file_names = glob.glob(os.path.join(DATA_PATH, \"*.wav\"))\n",
    "print(\" > Number of audio files: {}\".format(len(file_names)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "meta_f = open(META_PATH, 'r', encoding='utf8')\n",
    "meta = [m.split(\"|\") for m in meta_f.readlines()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f899c42f6f514ab9bf3834e5facef6a3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=13100), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "def load_item(item):\n",
    "    file_name = item[0]\n",
    "    text = item[2]\n",
    "    audio = librosa.load(os.path.join(DATA_PATH, file_name+'.wav'))\n",
    "    sr = audio[1]\n",
    "    audio = audio[0]\n",
    "    audio_len = len(audio) / sr\n",
    "    text_len = len(text)\n",
    "    return text, text_len, audio, audio_len\n",
    "\n",
    "# This will take a while depending on size of dataset\n",
    "if NUM_PROC == 1:\n",
    "    data = []\n",
    "    for m in tqdm(meta):\n",
    "        data += [load_item(m)]\n",
    "else:\n",
    "    with Pool(8) as p:\n",
    "        data = list(tqdm(p.imap(load_item, meta), total=len(meta)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e42aca59abe14f8bb32b5d5f19af1c67",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=13100), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      " > Number of words: 22943\n"
     ]
    }
   ],
   "source": [
    "# count words in the dataset\n",
    "w_count = Counter()\n",
    "for item in tqdm(data):\n",
    "    text = item[0].lower()\n",
    "    for word in text.split():\n",
    "        w_count[word] += 1\n",
    "print(\" > Number of words: {}\".format(len(w_count)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "647a2e1810324971aacb971acff91fb3",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(IntProgress(value=0, max=13100), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "text_vs_durs = {}  # text length vs audio duration\n",
    "text_len_counter = Counter()  # number of sentences with the keyed length\n",
    "for item in tqdm(data):\n",
    "    text = item[0].lower()\n",
    "    text_len = len(text)\n",
    "    text_len_counter[text_len] += 1\n",
    "    audio_len = item[-1]\n",
    "    try:\n",
    "        text_vs_durs[text_len] += [audio_len]\n",
    "    except:\n",
    "        text_vs_durs[text_len] = [audio_len]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# text_len vs avg_audio_len, median_audio_len, std_audio_len\n",
    "text_vs_avg = {}\n",
    "text_vs_median = {}\n",
    "text_vs_std = {}\n",
    "for key, durs in text_vs_durs.items():\n",
    "    text_vs_avg[key] = np.mean(durs)\n",
    "    text_vs_median[key] = np.median(durs)\n",
    "    text_vs_std[key] = np.std(durs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Plot Dataset Statistics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.collections.PathCollection at 0x7f2428497a90>"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAEICAYAAABPgw/pAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAHuRJREFUeJzt3X+cXXV95/HXO5ML3AmYCZBSMhiDVnGhrEDn0Y1VWRYoLGCARQlYULD2QX3UbhUxCNVCaGlBI0p3t4+6sbqIoBIjToPdinYr9SF9hJo4iSFCVuRH4CbAgIwgmYUh+e4f59xw5ub+OOf+/vF+Ph7zmHvPPXPu556Z+dzv/Xx/HIUQMDOz3jen0wGYmVlzOKGbmfUJJ3Qzsz7hhG5m1iec0M3M+oQTuplZn3BC7zOSgqTf6MDzniTpiXY/7yCQ9KikU+Pbfyrp75p03Hsk/UEzjpXy+ZoWu5XnhN5iyX/GJhzrFknXN+NYjerUG8egCyH8VQihbUm4XuXe4Hsl9l7mhG5mmSji3NGF/EtpIUlfARYDd0n6laQr4+1LJf2rpClJmyWdFG8/WNITkpbF9w+U9JCk90m6DLgIuDI+1l0pnn9/SZ+RtF3SU5I+LykfP3ZS/FxXSHpa0k5J70/87CGS7pL0vKQfSbpe0g/jx34Q77Y5juWCxM+VPV5JXBdI2lCy7XJJ6+LbZ0r6qaQXJBUkfazCcS6VdK+kz8Xn8mFJvxNvfzyO45KU52OBpG9LmpT0XHz7iMTP3iPpL+Lne0HSdyUdWiGuWsea9alN0kpJtyXuv1fSY5KelfSJkmOX7nu2pK3x679H0r8rF1O87+9KelDSLyX9D0BVjrsk/hQ2N/H6/1LSvcAu4PWS3i/pgfh8PCzpD+N95wH/CCyK/z5+JWlRltjjc/QxST+J471D0gGVXpvFQgj+auEX8ChwauL+KPAscCbRG+rvxvcXxo+fBjwJ/BrwBWBt4mdvAa6v8XwB+I349ueAdcDBwEHAXcAN8WMnAa8Afw7k4nh2AQvix78efw0DRwOPAz8s9zxpjlcS4zDwAvDGxLYfARfGt3cC74hvLwBOqPBaL42f8/3AEHA9sB34G2D/+Fy+AByY4nwcArwrju0g4BvAeOK57gF+DrwJyMf3b6wQV61jlf5NrARui28fDfwKODF+DZ+NX+OpZfZ9E/Ai0d9QDrgSeAjYr0xMh8bn4t3xvpfHx/2D0uPG95fEv+O5ide/HTgGmBsf4yzgDURvDP8x/n2fkPh7eKIkhtSxx+fo34BF8e/rAeCDnf5/7vavjgfQ719l/nk/DnylZJ+7gUsS9/87sAUoAIcktt9CyoQe/5O9CLwh8dhbgUfi2ycB08V/2Hjb08BSouQ4AxyVeOx6aif0sserEOdtwDXx7TfGyWY4vr8d+EPgNTVe66XAzxL3j43jOiyx7VnguFrno8yxjwOeS9y/B/hk4v4fAd9J+TdQeqzSv4lkorsG+HrisXnAy5RP6H8GrEnsOyf+mzmpTAzvA9Yn7gt4gmwJ/c9rvM5x4MOJv4dqCb1q7PE5ujjx+KeBzzf7/7Pfvlxyab/XAefHHzOnJE0BbwcOT+yzGvhN4JYQwrN1Ps9CohbixsTzfCfeXvRsCOGVxP1dwIHxPnOJWuVFyduVVDpeOV8F3hPf/j2iFuyu+P67iFr4j0n6F0lvrfKcTyVuTwOEEEq3FV9TxfMhaVjS/4xLHc8DPwBGJA0ljvVkmteW8liVLCJxrkMILxK9KVXa97HEvnvinx1NcdxAut9p0qz9JZ0hab2kX8Tn80yiTwJppIk91fm2Vzmht17pcpaPE7XQRxJf80IINwLE//SrgVuBP9LskSRZlsZ8hiiZHZN4nvkhhDT/FJNEH8ePSGx7bYbnTuN7wEJJxxEl9q8WHwgh/CiEcA5R2WkcWNOE56t1Pq4AjgL+QwjhNUQlD0jUmTOodawXid5cin49cXsniXMtaZiohFPODqIGQnFfxT9bKLNv6XHF7N9ptZiK9v79Sdof+CbwGaJPRCPA/+bV11jrbzVL7JaSE3rrPQW8PnH/NmCZpNMlDUk6QFEHZTF5/inRP8PvA6uAWxMtu9JjVRS3eL4AfE7SrwFIGpV0eoqf3Q3cCayMW5tvJvrIXu11ZRJCmCGqLa8iqpF+L45xP0kXSZof7/M8sKfe50k8X63zcRBRwp+SdDBwbQNPV+tYm4ALJeUkjRHVtYvWAu+U9HZJ+xH1SVT6P10DnCXpFEk5ojeSl4B/LbPvPwDHSDov7uj8E2Yn7U3AiZIWS5oPXF3jNe5HVOOfBF6RdAZRn0XRU8Ah8bEajd1SckJvvRuAT8Yf8z8WQngcOIcocU8StdhXAHMk/RbwUeB9cVL9FFFyvyo+1heBo+Njjad47o8TdTStjz/6/xNRyzGNPwbmE33s/QrwNaJ/uKKVwJfjWJanPGaprwKnAt8oKdW8F3g0jvmDRKN7mqHa+biZqLPzGWA9UTmmXrWO9WdEnYnPAdcx+9PJVuBD8bad8T5lJ2yFELYBFxP1uTwDLAOWhRBeLrPvM8D5wI1EJZw3AvcmHv8ecAfwE2Aj8O1qLzCE8ALRm8KaOMbfI+pwLj7+INHfzMPx38iiemO39BR3OJhVJelTwK+HEC6pubOZdYRb6FaWpDdL+veK/DbwAeBbnY7LzCqb2+kArGsdRPSReRFRPfQm4O87GpGZVeWSi5lZn3DJxcysT7S15HLooYeGJUuWtPMpzcx63saNG58JISystV9bE/qSJUvYsGFD7R3NzGwvSY/V3sslFzOzvuGEbmbWJ5zQzcz6hBO6mVmfcEI3M+sTTuhmZn3CU//NzFpkfKLAqru3sWNqmkUjeVacfhTnHl/u+iPN4YRuZtaASkl7fKLA1XduYXpmNwCFqWmuvnMLQMuSes2ELulLwDuBp0MIvxlvO5ho7eQlRNf+Wx5CeK4lEZqZdVgxaRemphmS2B0CoyN5/tObF/LNjYWySXvV3dv2bi+antnNqru3tSyh11ycS9KJRFchvzWR0D8N/CKEcKOkq4iu7P7xWk82NjYWPFPUzDqpVhmk9PHSpJ0kyl9rb3Qkz46p6bKPCXjkxrMyxSxpYwhhrNZ+NVvoIYQfSFpSsvkcoqt6A3yZ6IrgNRO6mVknVSqDbHjsF3z/wUkKU9OzknRhaprb12+veIHUStuLbwaFqel9Hls0km/0ZVRU7yiXw0IIO+PbTwKHVdpR0mWSNkjaMDk5WefTmZk1rlIZ5Pb12/cm39IkXc8C48WWfz43NGt7PjfEitPTXgUyu4Y7RUMIQVLF1xxCWE10FXvGxsa8+LqZNV2lGndpOWVHmRYz1Je0q3nxpVe4/I5NzM/nOCA3h6ldM109yuUpSYeHEHZKOhx4uplBmZmlVVpG2R33Cxamprn8jk185I5Ne5N88XurTU3P7P2ezw3xuQuOa2kiL6o3oa8DLiG6gvgl+NJkZtYmyU7L+fkcz/+/GfZUyNHFzcUk3kgyL9bWs745tHpkS1KaYYtfI+oAPVTSE8C1RIl8jaQPAI8By1sZpJkZ7NsaL7aE2yEQjV6596qTy8ZSTaVST7OlGeXyngoPndLkWMzM9pGsj3daMjEXW9zJIY4vvvRK2TeZVo5sSfJMUTPrGlnGgLdKtVJKaWI+9/jRfcawl7baWz2yJckJ3cxaLs2aJuXGiN+2fnvbY90TAjdfcFxdiblcq73VI1uSas4UbSbPFDUbPNVqzRKEUHnGZVa5OWLe/nMbqq0X6+TtXlirmqbNFDUzyyqZDOdUKWEUNzerWXngAXO5dtkxZVvX7/qtUb7/4GTVck6yFV5aTsmqE28ITuhm1hTJzstki7sd476LpnbNZCp7jL3u4LqSbpr1YNq90iK45GJmTZBlCF8zVOq4TA4rbJVKHZ83nHfs3mT9thv/ueyonHrjS1ty8RWLzKwh4xMFrlizuS3JPJ8b4uYLjuOm5W9p+zopRdWWxS2qNO681ePRXXIxs7qMTxRYuW5rSyb3DM0RB8Wdm9XWZulEp2WaZN2JlRbBCd3MMqhUJ69XuVEuC4ZzXLvsmJrJudFOy3qlSdYrTj+qI+PRndDNrKbxiQLX3bWV53a92hqvN5m3o87dSmmSdafGozuhm1lVzezwbOesyVZJm6w78QnCCd3MKip2eGYZepgbEvP2m8svp2eYn88h0bb1wNulU+WeWpzQzWyWRhbDSlv/ttZwQjcbQOUmxgD71MmzmrjmtGaFaHVwQjcbMOVmMa5YuxkCzFS6UkQKo21aItYqc0I3GzDlJsbM7E6fyOcomqmZTP790NnZD5zQzQZMI7MVBXx2+XFA55aItcqc0M0GyPhEoerqh9UIuGjp4r2J2wm8+zihmw2IYu08SzIvzuQsN+3euo8TutmAKFc7r8TDD3uTE7pZn6m0VneW2vnwfnOdzHuQE7pZH/nk+BZuX7997zoryQsrVFpUqpxWL/NqreGEbtYnxicKs5J50fTMbi5fs4ks/aCtXubVWsMXuDDrA8U1Vyrl7HLJfMFwjouXLu7YhSKs+ZzQzXpcPaNXIKqTX3/usdxw3rGMjuQR0WiW5KXUrLe45GLW47KMXkkq1sm7deVAy84J3awHNbIiYpHr5P3HCd2sxzTjghO5IblO3odcQzfrMSvXba2ZzHNDIjdHZR9bMJxj1bvf4jJLH3IL3axHjE8UWLluK1PTtdcrX/Xut0TfvYDWQHFCN+sBWcosoyN5L6A1oJzQzbpUPR2fHkM+2JzQzbpQ6RT+NLwiojmhm3WZSlP4K8nnhjwZyIAGR7lIulzSVkn3S/qapAOaFZjZIKo1hb/UguGck7ntVXcLXdIo8CfA0SGEaUlrgAuBW5oUm9lAyTKFf0jipuUeemizNVpymQvkJc0Aw8COxkMyGzzFlnmaZO4Si1VSd8klhFAAPgNsB3YCvwwhfLdZgZkNiiwtcy+eZdU0UnJZAJwDHAlMAd+QdHEI4baS/S4DLgNYvHhxA6Ga9adai2u5vGJpNVJyORV4JIQwCSDpTuB3gFkJPYSwGlgNMDY2lv1S42Z9Jjm+vHgR5kpcXrEsGkno24GlkoaBaeAUYENTojLrU6UzPqsl8yHJydwyaaSGfh+wFvgxsCU+1uomxWXWl9KuXZ7PDbnMYpk1NMolhHAtcG2TYjHrS8USy46p6dTjy90yt3p4pqhZC9WzdnlycS2zLLweulkLXXdX7bXLk3JzfOEJq58TulmLjE8UeG5X7bXLi0byOVad77q51c8lF7MWKM78rEXAIzee1fqAbCC4hW7WZFlmfvpCzdZMbqGbNYEvRmHdwAndrEFZRrIICPhiFNYaTuhmDVq5Lt1IFq/JYq3mGrpZA8YnCkxN1x7J4pmf1g5O6GYNWHX3tpr7eE0WaxcndLMG7KjRCeqWubWTa+hmGYxPFLjurq2pJgy549PazQndLKXxiQIr1m5mZnf18eVew9w6xQndrIrkSolzpJqThVwvt05yQjeroHR8eZqZn3tCcDK3jnGnqFkFaS9GkeSp/NZJTuhmZYxPFDJN4wdP5bfOc8nFrESx1JKGp/JbN3FCN4tlXWBrdCTPvVed3OKozNJzQjejvkvF1ZpUZNZurqGb4Q5Q6w9uodvAqmcN8yJ3gFo3ckK3gZS1xDKcm8OCefuzY2qaRe4AtS7lhG4DKe0a5hC1xv/Ksz+tBzih28BJu4Y5eDii9RYndBs4adYw95BE60Ue5WIDJc0MUHd4Wq9yC90GRpoZoC6xWC9zQreBUW2sudcwt37gkosNhFqlFidz6wduoVtfG58osHLd1qqjWkZH8k7m1hec0K1vfXJ8C7ev3061y1K4A9T6iUsu1pfGJwo1kzm41GL9xS106xv1LH/rZG79xAnd+kLWtVlcarF+1FDJRdKIpLWSHpT0gKS3NiswsyyyLH+7YDjnUov1pUZb6H8NfCeE8G5J+wHDTYjJrKZkeUWCUKtYTnS5uIuWLub6c49teXxmnVB3Qpc0HzgRuBQghPAy8HJzwjIrb3yiwHV3beW5Xa8OQ0yTzIckblr+FrfKra81UnI5EpgE/pekCUl/J2lek+Iy20exTp5M5mnkc0NO5jYQGknoc4ETgL8NIRwPvAhcVbqTpMskbZC0YXJysoGns0E2PlHgijWbM18mbnQk73q5DYxGauhPAE+EEO6L76+lTEIPIawGVgOMjY2l+HBsNluxZb47TW0l5uVvbRDV3UIPITwJPC6pOPbrFOCnTYnKLCHrBZxzc+QhiTaQGh3l8l+B2+MRLg8D7288JLPZak0UEuydETqSz7Hy7GNcYrGB1FBCDyFsAsaaFIvZ3uGIO6amGRnO8VKVlrlHrpjN5pmi1hXKrYpYbTSLwMncrIQTunVc1mn7EJVYnMzNZvNqi9ZxWTs9IRrFYmazOaFbx+1IuTpikRfWMivPCd06blGG1vZI3gtrmVXiGrp11PhEgV0vv1JzPw9HNKvNCd06otwiW0X53BwOyA0xtWuGRSN5Vpx+lBO5WQpO6NZ2ta71efC8/T1t36wOrqFbW6W51mfWTlIzizihW9sUV0ystcRWlk5SM3uVSy7WMsmrCs0R7EmxWKKHJJrVzwndWqK0Tp4mmXski1ljnNCt6dLUyZN8rU+z5nBCt6Zbdfe21MncKyaaNY87Ra3p0o5S8bU+zZrLCd2aLs0oFU/hN2s+l1ysKZIjWqTK+4165qdZyzihW8NK1zMvvZbzguEc1y7z6BWzVnNCt7pVW48laXi/uU7mZm3ghG51GZ8osGLtZmZ21x7P4qn8Zu3hTlGry6q7t6VK5uCp/Gbt4ha6pZbs+EzLU/nN2scJ3VKp50LOHtFi1l5O6JZK2gs55+aIVed7spBZJ7iGbjWNTxRSlVlG8jknc7MOcgvdypo1USjF/qMjeV9lyKzDnNBtH/tMFKqxvzs+zbqDE7rtI229HNzxadZNnNBtH2kmArnEYtZ9nNANeLVmvmNqmjkSu0sXZCnhEotZ93FCt31q5rWS+YLhnEssZl3IwxaN6+7amrpmns8Nce2yY1ockZnVwy30AZZ2tcQhiT0hsMgdoGZdzQl9QGWZyr8nBB658aw2RGVmjXBCH0DjEwWuWLO5Zq28yKslmvWGhmvokoYkTUj6djMCstYqtszTJnNPGjLrHc1ooX8YeAB4TROOZS2WZtKQiGaHetKQWW9pKKFLOgI4C/hL4KNNiciaKrkmy1CK8eUj+Rwrz/b1P816UaMt9JuBK4GDmhCLNVG5ESzVkvmQxE3LvVKiWS+ru4Yu6Z3A0yGEjTX2u0zSBkkbJicn6306y6BYJ681HLFI4GRu1gca6RR9G3C2pEeBrwMnS7qtdKcQwuoQwlgIYWzhwoUNPJ2llWVxLYjq5U7mZr2v7oQeQrg6hHBECGEJcCHwzyGEi5sWmdUtzeJaSaMelmjWFzwOvY8UO0DTDUiMeFiiWf9oSkIPIdwD3NOMY1l90sz8LA5HLI528bBEs/7iFnqfqFU3XzCcY+Ka09oYkZm1mxN6D0uOMa9lKuWIFzPrXU7oPeqT41u4ff321PVyr8di1v+8HnoPGp8oZErm7vg0GwxuofegLCNZ3PFpNjic0HvM+EQhVc3cF3E2GzxO6D0i7dWFwCUWs0HlhN4DslxdyCUWs8HlhN6FisMRd0xNMzKcY2p6hlrXo7j5guOcxM0GnBN6lyltjacpsYyO5J3MzczDFrtN1pUSXS83syIn9C6TZaXEkXyOG8471q1zMwNccukKWabwg68uZGblOaF3WJYRLOCrC5lZZU7oHZK1VQ5RMr9o6WInczMrywm9A+pplS/y+HIzq8EJvQOyjGTxFH4zS8sJvU2Sk4W8SqKZtYITehtkXbscPIXfzLJzQm+h8YkCK9dtZWo6/dWC8rkhjy03s7o4obdIlo7P4sWb3So3s0Y4obdIlo7PRe74NLMmcEJvsizrlhdlme5vZlaJE3oTjU8UWLF2MzO7s3R/+gLOZtYcTuhNMj5R4Io1m9lda+HyEh6aaGbN4oTeBMUO0FrJ/OYLjgPYOx7dsz/NrJmc0JsgTQdo8iIUTuBm1gpeD70JanVq5ubIZRUzazkn9AaNTxSYI1V8fCSfY9X5Xu7WzFrPJZc61ZoF6hmfZtZuTugZJNcwL87uLGdIcjI3s7ZzQi+RXBUxOQqldCp/tfEse0JwMjeztnNCTyhN2oWpaa6+cwuQfSq/mVm7OaEnlEva0zO7M03l90QhM+sUJ/SESsMP0ybzBcM5rl12jMstZtYRdSd0Sa8FbgUOIyoprw4h/HWzAuuERSP5TBdtThrJ55i45rQmR2Rmll4j49BfAa4IIRwNLAU+JOno5oTVGStOP4p8bijzz+VzQ6w8+5gWRGRmll7dLfQQwk5gZ3z7BUkPAKPAT5sUW1sVR7ek7fgcktgTgtdjMbOu0ZQauqQlwPHAfc04XrtluboQeNKQmXWnhqf+SzoQ+CbwkRDC82Uev0zSBkkbJicnG326pisue5s2mQNO5mbWlRpK6JJyRMn89hDCneX2CSGsDiGMhRDGFi5c2MjTNV3aZW+Tkqsmmpl1k0ZGuQj4IvBACOGzzQup9ZJT+LPwGHMz62aNtNDfBrwXOFnSpvjrzCbF1TLFVnmtZJ7PDXHx0sWMjuQRUcvcpRYz62aNjHL5IVB53dgulPYycV5cy8x6Ud/PFB2fKGSaug9w03KvX25mvaevE/r4RIEVazczszt9p+dIPudkbmY9qa+vWLTq7m2ZkrlnfJpZL+uZFnqldcqr7Z9lFMuoZ3yaWY/riYRebZ3y4sUnisl+fj7HzO49vPhy7YlCnvFpZv2kJxJ6pXXKV929DWBWsq90jc9SXurWzPpNTyT0SuuU75iazrSgFkSdnivPdiI3s/7T9Ql9fKLAHKns2PEAmevk9151chOjMzPrHl09yqWetVYq8bR9M+t3XZ3Qs5ZTKhnJ59z5aWZ9r6tLLpVq52m549PMBklXJ/R6r/HpMeVmNoi6uuRSzzU+ix2fTuZmNmi6uoVeTMrFSUOVRrsUuePTzAZZVyd0iJJ6MbEfedU/VNzPZRYzG3Rdn9CTKtXUPb7czKzLa+ilytXUXWYxM4v0VAu9tKaeZtVFM7NB0VMJHWbX1M3M7FU9VXIxM7PKnNDNzPqEE7qZWZ9wQjcz6xNO6GZmfUKhCWuNp34yaRJ4rG1PWNuhwDOdDiKlXooVeiveXooVeiveXooVujfe14UQFtbaqa0JvdtI2hBCGOt0HGn0UqzQW/H2UqzQW/H2UqzQe/GWcsnFzKxPOKGbmfWJQU/oqzsdQAa9FCv0Vry9FCv0Vry9FCv0XryzDHQN3cysnwx6C93MrG84oZuZ9YmBSOiSXivp+5J+KmmrpA/H21dKKkjaFH+d2elYiyQ9KmlLHNeGeNvBkr4n6Wfx9wVdEOdRifO3SdLzkj7STedW0pckPS3p/sS2sudSkf8m6SFJP5F0QhfEukrSg3E835I0Em9fImk6cY4/385Yq8Rb8Xcv6er43G6TdHoXxHpHIs5HJW2Kt3f83NYlhND3X8DhwAnx7YOA/wscDawEPtbp+CrE/ChwaMm2TwNXxbevAj7V6ThL4hsCngRe103nFjgROAG4v9a5BM4E/hEQsBS4rwtiPQ2YG9/+VCLWJcn9uujclv3dx/9zm4H9gSOBnwNDnYy15PGbgGu65dzW8zUQLfQQws4Qwo/j2y8ADwC9uKj6OcCX49tfBs7tYCzlnAL8PITQTbOBCSH8APhFyeZK5/Ic4NYQWQ+MSDq8PZGWjzWE8N0Qwivx3fXAEe2Kp5YK57aSc4CvhxBeCiE8AjwE/HbLgitRLVZJApYDX2tXPK0wEAk9SdIS4HjgvnjTH8cfZb/UDSWMhAB8V9JGSZfF2w4LIeyMbz8JHNaZ0Cq6kNn/EN16bqHyuRwFHk/s9wTd9eb/+0SfIIqOlDQh6V8kvaNTQZVR7nffzef2HcBTIYSfJbZ167mtaKASuqQDgW8CHwkhPA/8LfAG4DhgJ9FHrm7x9hDCCcAZwIcknZh8MESfC7tmzKmk/YCzgW/Em7r53M7SbeeyEkmfAF4Bbo837QQWhxCOBz4KfFXSazoVX0LP/O4T3sPsxki3ntuqBiahS8oRJfPbQwh3AoQQngoh7A4h7AG+QBs//tUSQijE358GvkUU21PFj//x96c7F+E+zgB+HEJ4Crr73MYqncsC8NrEfkfE2zpK0qXAO4GL4jcg4tLFs/HtjUQ16Td1LMhYld99t57bucB5wB3Fbd16bmsZiIQe18e+CDwQQvhsYnuyNvpfgPtLf7YTJM2TdFDxNlGn2P3AOuCSeLdLgL/vTIRlzWrhdOu5Tah0LtcB74tHuywFfpkozXSEpP8MXAmcHULYldi+UNJQfPv1wBuBhzsT5auq/O7XARdK2l/SkUTx/lu74yvjVODBEMITxQ3dem5r6nSvbDu+gLcTfaT+CbAp/joT+AqwJd6+Dji807HG8b6eaDTAZmAr8Il4+yHA/wF+BvwTcHCnY43jmgc8C8xPbOuac0v0RrMTmCGq236g0rkkGt3yN0Qtsi3AWBfE+hBR7bn4t/v5eN93xX8fm4AfA8u65NxW/N0Dn4jP7TbgjE7HGm+/Bfhgyb4dP7f1fHnqv5lZnxiIkouZ2SBwQjcz6xNO6GZmfcIJ3cysTzihm5n1CSd0M7M+4YRuZtYn/j9nZpV7WNt7xQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7f23b8395470>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.title(\"text length vs mean audio duration\")\n",
    "plt.scatter(list(text_vs_avg.keys()), list(text_vs_avg.values()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.collections.PathCollection at 0x7f24206c7358>"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAEICAYAAABPgw/pAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X2UXHWd5/H3J50CqwOTDpJxSGMMPuHCOAL2mY2jsgwyMoIB1gfAAQUfDuMZZ8YHRIOihFlmjUaU2d056+LqooIKIvYEZxR1kfHInDAmdjBGyIoCgSKEgDQgaaWT/u0f9954u1IP99bzw+d1Tp+uunXr3m/drv7Wr7739/tdhRAwM7P+t6DbAZiZWWs4oZuZDQgndDOzAeGEbmY2IJzQzcwGhBO6mdmAcELvY5KCpOd3Yb8nSHqg0/ttp/SxlPQZSR/pdkz1SLpX0knx7Q9J+t8t2u6tkt7Rim1l3F/LYh92C7sdwCCRdC/wjhDC91qwrauBB0IIlzS7rRbEEoAXhBDu7nYsnRBCeGe3Y8grhPBfux1DFpJOAK4JIRyeLOuX2PuBW+hm1hKKOKd0kQ9+i0j6ErAcuEnSryV9IF6+UtK/SZqWdEfcQkHSIZIekLQqvn+QpLslvUXSBcA5wAfibd2UYf8HSvqkpO2SdsZlg2L82Anxvi6U9LCkHZLemnruMyXdJOkJST+SdLmkH8aP/SBe7Y44lrNSz6u4vbK4zpK0sWzZeyWtj2+fIulnkp6UVJL0/irbOV/SbZI+HR/LX0r6k3j5/XEc52U5HvHjF8VxPyjpbWX7ulrS5fHtJZK+KWmXpMfi24en1r1V0n+JY3tS0nckHVrlNdTb1r4SSnx/jaRrUvffLOk+SY9K+nDZtsvXPU3S1vhY3SrpP1SKKV73zyTdJelxSf8DUI3trlBUnlqYev1/L+k2YDfwXElvlXRnfDx+Kekv43UXAd8ClsXvpV9LWpYn9vgYvV/ST+J4r5P0jGqvbeiEEPzToh/gXuCk1P1x4FHgFKIPzz+L7y+NH3818BDw+8BngRtSz70auLzO/gLw/Pj2p4H1wCHAwcBNwMfix04A9gB/BxTieHYDS+LHvxr/jAJHAfcDP6y0nyzbK4txFHiSqGSTLPsRcHZ8ewfwyvj2EuC4Kq/1/HifbwVGgMuB7cA/AgfGx/JJ4KAMx+PPgZ3AHwKLgC+XHct9xx54JvD6+HUcDHwNmEzFdSvwC+CFQDG+v7bKa6i3rfL3zxqi8gTx3+XXwPHx6/1UfDxOqrDuC4GniN5vBeADwN3AARViOjQ+bm+I131vvN13lG83vr8iPlYLU69/O3A0UQm3AJwKPI/og+E/xe+N41LvnQfKYsgce3yM/h1YFv9t7wTe2e3//V756XoAg/RT4R/yg8CXyta5GTgvdf+/A1uAEvDM1PJ9SaXG/gLw/Pgf5yngeanHXgbcE98+AZhJ/gnjZQ8DK4mS4yxwZOqxy6mf0Ctur0qc1wAfjW+/IE4go/H97cBfAr9X57WeD/w8df/FcVzPSi17FDgmw/H4PKmkGyeRigm9QhzHAI+l7t8KXJK6/1fAtzO+X8q3Vf7+WcPvEt1Hga+mHlsEPE3lhP4R4PrUugvi99cJFWJ4C7AhdV/AA+RL6H9X53VOAu9OvXdqJfSascfH6NzU458APtPs/+6g/Ljk0l7PAd4Yf3WcljQNvAI4LLXOVUQtxatDCI82uJ+lRK2+Tan9fDtenng0hLAndX83cFC8zkKiVnkifbuaatur5MvAm+Lbf0HUKt0d3389UQv/Pkn/KullNfa5M3V7BiCEUL4seU21jscy5r/G+6rtUNKopP8VlzqeAH4AjEkaSa32UOp21eOQcVvVzIs5hPAU0QdYtXXvS607Fz93PMN2A9n+/mnz1pf0GkkbJP0qPvanEH0TyCJL7JmO9zByQm+t8qkr7ydqoY+lfhaFENYCxP/IVwFfBP5K87sg5pkG8xGiZHZ0aj+LQwhZ3ui7iL5iH55a9uwc+87iu8BSSccQJfYvJw+EEH4UQjidqOw0CVzfgv3VOx47mP8al9fY1oXAkcB/DCH8HlHJA1J15hzqbespog+ixB+kbs+LWdIoUQmnkgeJGhPJuoqfW6qwbvl2xfxjUyumxL73qqQDga8DnyT69jQG/Au/e4313td5YrcyTuittRN4bur+NcAqSSdLGpH0DEUnKJPk+SGiN/jbgHXAF1OttfJtVRW3Yj4LfFrS7wNIGpd0cobn7gVuBNbELcgXEX0Nr/W6cgkhzBLVi9cR1T2/G8d4gKRzJC2O13kCmGt0P6n91Tse1wPnSzoqToyX1tjcwUQfDtOSDqmzbj31trUZOFtSQdIEUV07cQPwWkmvkHQA0fmLav+/1wOnSnqVpALRB8lvgX+rsO4/A0dLel18ovNvmZ+0NwPHS1ouaTFwcZ3XeABRjX8XsEfSa4jObyR2As+Mt9Vs7FbGCb21PgZcEn/Nf38I4X7gdKLEvYuoxX4RsEDSS4H3AW+Jk+rHiZL76nhbnwOOirc1mWHfHyQ6ebQh/jr/PaLWYBZ/DSwm+ir7JeArRP9EiTXAF+JYzsy4zXJfBk4CvlZWqnkzcG8c8zuJeve0QtXjEUL4FnAlcEu8zi01tnMl0cnOR4ANRKWbRtXb1keITiY+BlzG/G8yW4F3xct2xOtUHNwVQtgGnEt0fuYRYBWwKoTwdIV1HwHeCKwlKuG8ALgt9fh3geuAnwCbgG/WeoEhhCeJPhSuj2P8C6KT08njdxG9v34Zv5+WNRq77U/xiQWzfSR9HPiDEMJ5dVc2s57hFroh6UWS/kiRPwbeDnyj23GZWT4e+m8Q1Xa/QtTDYCdwBfBPXY3IzHJzycXMbEC45GJmNiA6WnI59NBDw4oVKzq5SzOzvrdp06ZHQghL663X0YS+YsUKNm7cWH9FMzPbR1LV0cxpLrmYmQ0IJ3QzswHhhG5mNiCc0M3MBoQTupnZgHBCNzMbEB76b2bWJpNTJdbdvI0Hp2dYNlbkopOP5IxjK11npDWc0M3M6qiVmKs9NjlV4uIbtzAzuxeA0vQMF9+4BaBtSb3uXC6SPg+8Fng4hPCH8bJDiOZIXkF0jb8zQwiP1dvZxMRE8MAiM+umvK3m8sScWDJa4NQ/OoyvbyrNe6xYGOFjr3sx627eRml6Zr/tjY8VuW31iblilrQphDBRd70MCf14oquNfzGV0D8B/CqEsFbSaqKrvX+w3s6c0M2smyol5yQBV2tx7356D4/tnq24PVH5mnrjY0UenJ6p+JiAe9aemivurAm97knREMIPgF+VLT4d+EJ8+wvAGbmiMzNrg8mpEi9fewtHrP5nXr72Fian5l+KdN3N2/Zrac/M7mXdzdv2Pf/iG7dQipNxaXqmajKH6hdITT4MKqm2vBUaraE/K4SwI779EPCsaitKugC4AGD58lrX4jUza8zkVInLbto6L/lWqlk/WKEEkqz78rW3VCyRNCIp5VT6NnDRyVmvDJlfpvnQJa0AvpkquUzHV/NOHn8shLCk3nZccjGzvOrVvKvVuBNjxQKLDlzIg9MzLJDY24FrQIwVCzw+M8viYgEJpnfPNtXLJWvJpdEW+k5Jh4UQdkg6DHi4we2YmVVVqafIRTfcwZr1W/clzCd+M8tcjRw9PTPL9EzUcm80mQsoFhawe3Yu0/rJ/qZnZikWRvj0Wce0tbtiotGBReuB5ALC5+HLlZlZG1Sqec/uDUzPzBKIEmatZN4qAQiIc1cuZ3ysiIARKdNz0zX6dqvbQpf0FeAE4FBJDwCXAmuB6yW9HbgPOLOdQZpZf8vaVTBZrzQ9w0iHyiNZzczu5ft37drX5bBeqSetWu2+1eom9BDCm6o89KoWx2JmA6jeAJt0Ek93A+ylZJ5IJ+bkAyn9QfXUb/fsK7ektbNnS5pHippZW9XrKphO9r2QwiWo9llSnpjPOHa87gnadvdsSXNCN7OWS5dYqiXp0vQM77luc0v3W22gT611k9LOeI1h+5AtMVdqtbd7/pY0J3Qza6k8teVWC0SjNKv1Jx+RmAuhbqJtJjGXt9o7yQndzFoiXQvvlqSVfdENdzC7d35bvbBArHvjSzIn22YTc6dnWgQndDNrgXa2ypOSyFhqkM7YaIFf/2YPs6k+i0lJJEma6ZGjY8UCa047umUJNe9gp07MtAhO6GaWQzqRJaMga8110ox6sxLWSqrtLHtkSda1TgQ7oZtZ15Unskrd81ol6wnIbtSqsyTrav3O290f3ZegM7O6JqdKXHj9HS0pqaQHWCa3x4oFlowWEFHLPD2dba/Jkqy7MdMiuIVuZnUkLfNWDPQpn3u8Hy2r0osmnay7MdMiuIVuZnVUKjFk1U8t76wuOvlIioWRecvKk/UZx47zsde9eN+8L5167W6hm1lFleYYz2IQWuG1ZO2j3o0avxO6mc0zOVVizfqtDZ30HO/wyMhu6ebgoVqc0M1sn2b6kwtyX/zYWssJ3WwIVevD3Uy9vFMzClp1TuhmQ6bWwJgs/aSXjBb4zexc12YUtOqc0M2GTLWBMVlmPiwsEJeuOnrfdroxo6BV54RuNmQaHa1YPh+KE3jvcUI3GyKTU6VcV74fkbjizOwzFFp3eWCR2ZBoZMTnXAhO5n3ECd1sSDTSg8U9V/qLE7rZkMhbO3fPlf7jGrrZAKk1R3i1SaUWCJLrRCTX2RyWEZ+DRqEFM6hlNTExETZu3Nix/ZkNk1qjPKO+43uZmZ2bt3zQ510ZFJI2hRAm6q3nFrrZgKhVI680wdaS0QKXrmrdZdms+1xDNxsAk1Ol3BdnHj1goZP5gHFCN+tzSaklr3ZfDs06zwndrM81OqGWuyQOHid0sz7XSEtb4C6JA8gnRc36RLpL4uJiAanyyc60dJfEhIBzVi53/XwAOaGb9YHyLon1riaU7o5Yq2+6DRYndLM+kKdOPiLN61veq5dLs9ZzDd2sD+Spk3tCreHlhG7WB/L0SHHvleHlkotZD5qcKnHZTVv3nfRUxud5Qq3h1lRCl/Re4B1E8/lsAd4aQvhNKwIzG0aTUyXWrN+630nPLDMueUItazihSxoH/hY4KoQwI+l64Gzg6hbFZjZUak2uVW5EYi4E91qxeZotuSwEipJmgVHgweZDMhtOeXqyzIXAPWtPbXNE1m8aPikaQigBnwS2AzuAx0MI32lVYGbDJO/kWj7xaZU0nNAlLQFOB44AlgGLJJ1bYb0LJG2UtHHXrl2NR2o2oPJOruUTn1ZNMyWXk4B7Qgi7ACTdCPwJcE16pRDCVcBVEF3goon9mQ2UZARnlpa5BCH4xKfV1kxC3w6slDQKzACvAnw5IrM6qvVkqeTKs45x8rbMGk7oIYTbJd0A/BjYA0wRt8TNrLI8PVnGx4pO5pZLU71cQgiXApe2KBazgZe1J4vr5NYID/0366Csc7L4ws3WCA/9N2uz9PS1CyT2htp9A1xqsUY5oZu10SWTW7h2w/Z9Q/frJXOXWqwZTuhmbTI5VZqXzCsZi688NL171sP4rWlO6GZtsu7mbTWTebEwwprTjnYCt5bxSVGzNql3AnRmdi/rbt7WoWhsGLiFbtYC6VGfI/GJz5EMJ0DzXInIrB4ndLMmlQ8WSpJ4vWQOnmTLWsslF7Mm5Zn2Ns09WqzV3EI3a1LWaW/HigUWHbiQB6dn3KPF2sIJ3awJk1MlRLZLxD0+M8vmS1/d7pBsiLnkYtaEel0T01wvt3ZzC90sh/Qw/rHRAo/trj8FLrhebp3hhG6WUXlvllrJfMlogdEDXC+3znJCN8soz9S3l67yCFDrPNfQzTLy1LfW69xCN6sgXStPSibLxop1uyh66lvrJid0szLltfLS9AzvuW4zxULtL7Q+8Wnd5pKLWZlqtfKZ2bmqzxkrFlxqsa5zC92sTJaRn8lgonH3YLEe4oRuFpucKnHZTVszrZsk89tWn9jeoMxycEK3oZU+8bm4WOCpp/cwuzfruE9PfWu9xwndhlL5ic/pmWwjPtM8lN96jU+K2lBqdMrbhHu0WC9yQrehlKdcUiyMcO7K5YyPFRFR7dw9WqwXueRiQ2lxsZCpzDJWLPhCztY3nNBt6ExOlXjq6T1117vyrGOcyK2vOKHbUJmcKnHh9XfUvd6nh/BbP3IN3YZG0rOlXjL3CU/rV26h29DI0rPFIz+tnzmh29Co1bOlWBhxzxXre07oNtCS0aC15mcZkZzMbSA4odvAumRyC9du2F7zIs5umdsgcUK3gTM5VWLN+q11+5m7ZW6DxgndBkbWRJ6YC8HJ3AZKU90WJY1JukHSXZLulPSyVgVmlkfSJTHPJFueXMsGTbMt9H8Avh1CeIOkA4DRFsRkVlf5NT93P70n12Rb7mtug6jhhC5pMXA8cD5ACOFp4OnWhGVWXaVrfubhvuY2qJppoR8B7AL+j6SXAJuAd4cQnmpJZGZVNDL17ZLRApeu8iRbNtiaSegLgeOAvwkh3C7pH4DVwEfSK0m6ALgAYPny5U3szoZZlv7klTiR2zBpJqE/ADwQQrg9vn8DUUKfJ4RwFXAVwMTERPbre5nFykssWXm2RBs2DfdyCSE8BNwvKTmz9CrgZy2JyiylkRKLZ0u0YdRsL5e/Aa6Ne7j8Enhr8yGZzZf3YsyFBXIPFhtKTSX0EMJmYKJFsZjt1x3xopOPZNlYsWbtXIJkRlxfYciGmUeKWs+o1B3xvddtJgCCinOyeC4Ws99xQreeUalWHsp+AywQzAX3Jzcr54RuPWFyqpS5S+KBC90qN6vEl6CzrktKLVnNzO5l3c3b2hiRWX9yQreua6RbYt6eL2bDwAnduq6R5OyZEs3254RuXZc3OXumRLPKfFLUOi7d13xxscDs3rn91km6KY6PFfnTFy3l+3ftmtc33SdEzfbnhG4dVd7XvNIFKTw4yKwxLrlYR2U5AbrowIVO5mYNcEK3jspyAtQ9WMwa45KLtU16DvMRib0h2+zJ7sFi1hgndGuLSya3cO2G7fuG7GdN5u7BYtY4l1ys5SanSvOSeVZjxYKH9Js1wS10a7l1N2/Llcw9yZZZazihW8vlOak5PlbkttUntjEas+Hhkou11ORUiQVSpnVdLzdrLbfQrSUmp0pcdtNWHtu9/0ChtKS3i8ssZq3nhG5NyZLIRySuOPMlTt5mbeaEbg0r75pYzVwITuZmHeAaujUkT9dEDxQy6wwndGtI1q6JPvFp1jkuuVhm6WlvsyRzz5po1llO6JZJ+bS3tTiRm3WHE7pVlG6NLxsr8tRv99RN5gLOWbmcy894cWeCNLN5nNBtP+Wt8VKGkZ/uV27WfT4pavvJchGKtOTEp5O5WXc5odt+8l5gYmZ2L+tu3tamaMwsKyd0208j/cZ9lSGz7nNCt/1cdPKRFAsjuZ7jwUNm3eeTogbs36vl9S8d55oN2zM914OHzHqDE7rtNydLaXqGazZsR4JqV45bMlpgevcsy9y7xaxnOKEPsXozJVZK5u5rbta7nNCHVJ6RnyMScyG4NW7W45zQh1SevuZzIXDP2lPbHJGZNavpXi6SRiRNSfpmKwKy9pucKmUa/ZlwDxaz/tCKbovvBu5swXasA5JSS1buwWLWP5oquUg6HDgV+HvgfS2JyNoi6ZZYr2U+WljAAQtHeHzGPVjM+k2zNfQrgQ8AB7cgFmuDrBdvBrjyrGOcvM36WMMlF0mvBR4OIWyqs94FkjZK2rhr165Gd2cNSMorWZL5+FjRydyszzVTQ385cJqke4GvAidKuqZ8pRDCVSGEiRDCxNKlS5vYneUxOVXiwuvvyNSTReA6udkAaDihhxAuDiEcHkJYAZwN3BJCOLdlkVnDkpb53mrDPMsEcOvcbAB4cq4BlHc+83F3SzQbCC0ZWBRCuBW4tRXbssZl7cmS5m6JZoPDLfQBkZRZaiXzEYlzVy5nfKyIiFrmH3vdi11uMRsQHvo/IOqVWQoLxLo3vsTJ22yAuYU+IOpdMeigZyx0MjcbcG6h97E8NfPpDH3Rzay/OaH3qfKLUtTjCbbMBp9LLn1ocqqUK5m7J4vZcHALvc8kI0CzJHOBJ9gyGyJO6H0izyRbEHVJvG31iW2Oysx6iRN6H8hzuThwicVsWDmh94E8Q/nHXWIxG1pO6D0sa5llROKKMz1oyGzYOaH3oMmpEmvWb2V6pn69vFgY8fB9MwOc0HtCMkDowekZFhcLPPX0Hmb31u/HMlYssOa0o53MzQxwQu+68hOeWVrl4MvFmdn+PLCoy/LOXQ6+XJyZVeaE3mV55i6HaNZEd0k0s0qc0LtkcqrEMZd9J9dzxooFT4FrZlW5ht4FeQcKuV5uZlk4oXdIuifLAinzBZxdLzezrJzQO6B8qtusydxD+M0sDyf0Nss71a2AgIfwm1l+TuhtlGeqW4AlowUuXeWBQmbWGCf0NklOfGYpr3guFjNrBSf0Fstznc/EXAhO5mbWNCf0FsrbHTHh632aWSt4YFELNTKM3z1ZzKxVnNBbZHKqVLfMUiyMcO7K5YyPFRFRTxZPfWtmreKSSwskpZZ6nLzNrJ2c0JuUdE2s15vFIz7NrN1ccmlC1q6JrpObWSe4hZ5Dej6WZWNFdj+9p+5JUI/4NLNOcULPqLxLYpYToK6Zm1knOaGXKW+FJ63rPF0SRyQnczPrOCf0lEqt8Itv3MLG+36VeeSnW+Zm1i0+KZpSqRU+M7uXazdsz7wNJ3Mz65aGE7qkZ0v6vqSfSdoq6d2tDKwbHqzSCs86W6K7JppZNzXTQt8DXBhCOApYCbxL0lGtCas7mplTReCuiWbWVQ0n9BDCjhDCj+PbTwJ3An3dPL3o5CMpFkZyP0/AOSuXu3VuZl3VkpOiklYAxwK3t2J73ZD0bsnSk6UwIhYdsJDHZ2bn9YQxM+umphO6pIOArwPvCSE8UeHxC4ALAJYvX97s7toiz7S3IxLr3uCLUZhZ72mql4ukAlEyvzaEcGOldUIIV4UQJkIIE0uXLm1md22RzMWStY+5L0ZhZr2q4Ra6JAGfA+4MIXyqdSF1ziWTW3JdwBl8MQoz613NlFxeDrwZ2CJpc7zsQyGEf2k+rPaanCpx2U1beWz3bM31xPwui55ky8x6WcMJPYTwQ6Kc11ey1suLhRFe/9Jxvn/Xrv2mATAz60VDMfQ/74WbPReLmfWjgUvo5ZNr/emLlvL1TaXMJz0FXHGme7GYWf8ZqIReaXKta3LMw+IBQmbWzwYqoeeZ4rbcWLHAmtOOdjI3s741EAk9b408bURyicXMBkLfJ/Q8ozzLee5yMxskfZ/QGy2z+FqfZjZoej6hV7skXKLaHObVuFVuZoNKIeQZ+N6ciYmJsHHjxszrVyunFAsLeEZhpO5Iz3JLRgtcusonPs2sv0jaFEKYqLdeT7fQq5VTZmbnmJmdq/v8EYm5EDzK08yGQk8n9LzllDSXVsxs2PT0RaIbndlwfKzoZG5mQ6enE3ojl4QbHyty2+oTnczNbOj0dMklScpZproFT29rZsOtp1voECX1qY++mivPOqbmei6zmNmw6/mEnjjj2HHGq9TUXWYxM+ujhA6Va+ous5iZRXq6hl4uaYHXGjlqZjas+iqhQ5TUncDNzPbXVyUXMzOrzgndzGxAOKGbmQ0IJ3QzswHhhG5mNiA6Oh+6pF3AfR3bYX2HAo90O4iM+ilW6K94+ylW6K94+ylW6N14nxNCWFpvpY4m9F4jaWOWSeN7QT/FCv0Vbz/FCv0Vbz/FCv0XbzmXXMzMBoQTupnZgBj2hH5VtwPIoZ9ihf6Kt59ihf6Kt59ihf6Ld56hrqGbmQ2SYW+hm5kNDCd0M7MBMRQJXdKzJX1f0s8kbZX07nj5GkklSZvjn1O6HWtC0r2StsRxbYyXHSLpu5J+Hv9e0gNxHpk6fpslPSHpPb10bCV9XtLDkn6aWlbxWCry3yTdLeknko7rgVjXSborjucbksbi5SskzaSO8Wc6GWuNeKv+7SVdHB/bbZJO7oFYr0vFea+kzfHyrh/bhoQQBv4HOAw4Lr59MPD/gKOANcD7ux1flZjvBQ4tW/YJYHV8ezXw8W7HWRbfCPAQ8JxeOrbA8cBxwE/rHUvgFOBbgICVwO09EOurgYXx7Y+nYl2RXq+Hjm3Fv338P3cHcCBwBPALYKSbsZY9fgXw0V45to38DEULPYSwI4Tw4/j2k8CdQD9Oqn468IX49heAM7oYSyWvAn4RQuil0cCEEH4A/KpscbVjeTrwxRDZAIxJOqwzkVaONYTwnRDCnvjuBuDwTsVTT5VjW83pwFdDCL8NIdwD3A38cduCK1MrVkkCzgS+0ql42mEoEnqapBXAscDt8aK/jr/Kfr4XShgpAfiOpE2SLoiXPSuEsCO+/RDwrO6EVtXZzP+H6NVjC9WP5Thwf2q9B+itD/+3EX2DSBwhaUrSv0p6ZbeCqqDS376Xj+0rgZ0hhJ+nlvXqsa1qqBK6pIOArwPvCSE8AfxP4HnAMcAOoq9cveIVIYTjgNcA75J0fPrBEH0v7Jk+p5IOAE4DvhYv6uVjO0+vHctqJH0Y2ANcGy/aASwPIRwLvA/4sqTf61Z8KX3zt095E/MbI716bGsamoQuqUCUzK8NIdwIEELYGULYG0KYAz5LB7/+1RNCKMW/Hwa+QRTbzuTrf/z74e5FuJ/XAD8OIeyE3j62sWrHsgQ8O7Xe4fGyrpJ0PvBa4Jz4A4i4dPFofHsTUU36hV0LMlbjb9+rx3Yh8DrgumRZrx7beoYiocf1sc8Bd4YQPpVanq6N/mfgp+XP7QZJiyQdnNwmOin2U2A9cF682nnAP3UnwormtXB69dimVDuW64G3xL1dVgKPp0ozXSHpz4EPAKeFEHanli+VNBLffi7wAuCX3Ynyd2r87dcDZ0s6UNIRRPH+e6fjq+Ak4K4QwgPJgl49tnV1+6xsJ36AVxB9pf4JsDn+OQX4ErAlXr4eOKzbscbxPpeoN8AdwFbgw/HyZwL/F/g58D3gkG7HGse1CHgUWJxa1jPHluiDZgcwS1S3fXu1Y0nUu+UfiVpkW4CJHoj1bqLac/Le/Uy87uvj98dm4MfAqh45tlX/9sCH42O7DXhNt2ONl18NvLM2FuNCAAAAPElEQVRs3a4f20Z+PPTfzGxADEXJxcxsGDihm5kNCCd0M7MB4YRuZjYgnNDNzAaEE7qZ2YBwQjczGxD/H1uk1sA686L8AAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7f23ba4515f8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.title(\"text length vs median audio duration\")\n",
    "plt.scatter(list(text_vs_median.keys()), list(text_vs_median.values()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.collections.PathCollection at 0x7f242065e8d0>"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAEICAYAAACktLTqAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X2UHOV15/Hv1aiBERAGgpKYAYGSEBQTYmQUzB4lWSAYMF5AgQSB7djZOMtmE/YsGCsrr31AYZ1FjpaY7Fl2syTh+CU4CIw9kQ8k4ASSbNjgaOSRYouAQ3hVQ2wZGNswA5qR7v7R1aKmp6q6qru6u6r69zlnjmaqS93PVPfceuo+93nK3B0REamWJYNugIiI5E/BXUSkghTcRUQqSMFdRKSCFNxFRCpIwV1EpIIU3KU0zMzN7EcH8Lpnm9mefr+uSDcU3KVjZvaMmZ2X03N9ysw+nsdzdWtQJ5GWNhxiZreY2R4zezU41rcGj70a+jpgZrOhn99rZpvMbM7Mvhd8fcPM/qeZvWWQv5P0l4K7SDF9BFgDnAkcCZwNfBXA3Y9ofgHPAReHtt0Z/P+t7n4kcAzw88APATsU4IeHgrt0xMw+C6wAvhT0GH8z2H6Wmf0/M5s2s11mdnaw/ZigF3px8PMRZvakmb3fzK4G3gv8ZvBcX0rx+oea2X83s+fM7Jtm9vtmNho8dnbwWteb2bfM7EUz+7eh//v9ZvYlM/uumW03s4+b2d8Gj/1NsNuuoC3rQ/8v8vla2rXezCZbtl1nZtuC7y8ys8eCHnXdzD4c8yv+FPBFd3/BG55x98+0Oy6t3H3O3XcD64G9wPVZn0NKyt31pa+OvoBngPNCP48DLwEX0eg4vDP4eXnw+PnAvwA/APwB8PnQ//0U8PE2r+fAjwbffxLYRqNneiTwJeDm4LGzgXngJqAWtGcGODp4/K7gaxnwVuB54G+jXifN87W0cRnwPeDk0LbtwJXB9y8CPxN8fzTw9pjf9WM0euW/DpwGWJr3INi2CfjjiH1vAr4y6M+NvvrzpZ675Ol9wP3ufr+7H3D3LwOTNIIh7v4gcA/wl8G2f9/Ji5iZAVcD17n7y+7+PeC/AVeGdpsDbvJGz/V+4FXgFDMbAS4HbnT3GXd/DPh0ipeNfL7Wndx9BvhT4KqgrScDq2iciJrP81Yz+z53f8XdvxrzejcDn6BxRTMJ1M3sAynameQFGidDGQIK7pKnE4FfDFIy02Y2Dfw0EM7z3g78BPApd3+pw9dZTqOHvCP0On8ebG96yd3nQz/PAEcE+yyl0VtvCn8fJ+75onyOILgD7wEmgqAPjRPLRcCzZvbXZvavop7A3fe7+23uvhYYA34buMPMfjxFW+OMAy938f+lRBTcpRutS4o+D3zW3cdCX4e7+2aAoNd8O/AZ4NdbKlKyLE/6bWAWODX0Okd5Y4Cxnb00UizHh7adkOG10/gysNzMTqcR5D/XfMDdt7v7pTRSUxPA3e2ezN1n3f024BUaaaTMzGwJcDHwfzv5/1I+Cu7SjW8CPxz6+Y+Bi83sAjMbMbPDgsHNZiD9LzSC+K8AW4DPBAE/6rliufsBGjn7T5rZDwCY2biZXZDi/+4HvgBsMrNlZrYKeH+b3ysTd5+jkX7aQiMN8uWgjYcEpYpHBft8FzgQ9Rxmdm1w7EbNbGmQkjkSmMrSluD//jjwJzQqZn63099LykXBXbpxM/CxIDXyYXd/HriURhDfS6MnvwFYYmZnAB8C3h8E2E/QCPQbg+f6Ixq56Gkzm0jx2v8ZeBJ41My+C/wFETnwGNcAR9EY3P0sjcD3RujxTcCng7ZckfI5W30OOA+4pyWd80vAM0Gbf41GTj3KDHBL0MZvA78BXO7uT6V8/fVm9irwHRr5/peAM9z9hcy/iZSSuetmHTLczOwTwA+5e7cDliKFoZ67DB0zW2VmP2kNZwIfBL446HaJ5GnpoBsgMgBH0kjFHEcjv34LjfJFkcpQWkZEpIKUlhERqaCBpWWOPfZYP+mkkwb18iIipbRjx45vu/vydvsNLLifdNJJTE5Ott9RREQOMrNn0+yntIyISAUpuIuIVJCCu4hIBSm4i4hUkIK7iEgFKbiLiFSQgruISAUpuIuIVJAWDiu4iak6Wx54ghemZzlubJQNF5zCutXjg26WiBScgnuBTUzV+cgXvsbs3H4A6tOzfOQLXwNQgBeRRErLFNiWB544GNibZuf2s+WBJwbUIhEpCwX3AnthejbTdhGRJgX3AjtubDTTdhGRJgX3AttwwSmM1kYWbButjbDhgrT3gRaRYaUB1QJrDpqqWkZEslJwL7h1q8cVzCtIJa7SawruIn2mElfpB+XcRfpMJa7SDwruIn2mElfph1TB3cwuNLMnzOxJM9sY8fgKM3vYzKbM7B/M7KL8mypSDSpxlX5oG9zNbAS4DXgX8FbgKjN7a8tuHwPudvfVwJXA/8q7oSJVoRJX6Yc0A6pnAk+6+1MAZnYXcCnwWGgfB74v+P4o4IU8GylSJSpxlX5IE9zHgedDP+8B3tGyzybgQTP7j8DhwHlRT2RmVwNXA6xYsSJrW0UqQyWu0mt5DaheBXzK3Y8HLgI+a2aLntvdb3f3Ne6+Zvny5Tm9tIiItEoT3OvACaGfjw+2hX0QuBvA3f8OOAw4No8GiohIdmnSMtuBk81sJY2gfiXwnpZ9ngN+DviUmf04jeC+N8+GihSNZplKkbUN7u4+b2bXAA8AI8Ad7r7bzG4CJt19G3A98Admdh2NwdVfdnfvZcNFBimvWaY6QUiv2KBi8Jo1a3xycnIgry3SrbWbH6IeMelofGyURzaem+o5Wk8Q0CiJvPmy0xTgJZaZ7XD3Ne3209oyIh3oZpZps7cedXKYndvPpm271ZuXrim4y9BLmxoJ77fEjP0RV73tZplG9dZbTc/OMT07B2hRMemc1paRodYMtvXpWZw3g+nEVD1xv6jAnmaWadSiYe1oUTHphIK7DLW0KzTGBeURM4xGrj1NrrzTxcG0qJhkpbSMDLW0ufO4/Q648/Tmd6d+vePGRiNz7dA4Qczsm+eVmbnI/yeShXruMtTSrtCY10qOcYuG3br+dB7ZeC43XnxqqkXFJqbqrN38ECs33sfazQ8tSiOJKLjLUEu7QmNeKzmuWz3OzZedxvjYaGQ6p93jkH6cQIab0jIy1LKs0HhYbcnBvPvYaI1Nl5yaqoIlqhonqRa+3aJiSeMEqqiRJgV3GXrtgmlU+eIb8wdSPXcv7peqOzlJGkrLiLTR6T1PJ6bqXH/3rtzvlxqX519iphy8HKTgLtJGJz3lZo89qh4+6v9mGSCNyv9Do/ZeOXhpUlpGpI248sVwD7o1rz6zbz5xslLr/82SumkdJ4iaLascvCi4SyXltdrixFSd196YX7Q9XCkTFZyTtFbZxKV9rr97FxAf4JvbV268L/J1lIMfbgruUjl5LscbtQ7M0ctq3HjxqQt60FmWFAjn3NetHo8NwvvdU7W73ZVF1Imu2W4tTlZdyrlL5XQ6AJrmeQCWHbJ0QSDspIcczosnTYRK0+6oHLwB56xaHlkTv+Hzu9hwzy7VyVecgrsMVC9mWuZVKpj2eeKC89ho7eBkpBGzRY83A3fcAGm7djStWz3O5WeME34FB+7dUWfTtt2LTlBz+525A9E5eqkOpWVkYHpRAw7pBkDzfJ4NF5wSedON8CSnpLx4c5/r794VWV1z1GiNtZsf4oXpWY4arWEG0zNzC9IpDz++l9b/OTu3P1O6qN1YgZSLeu4yMHmlT1p1s1RA+EritTfmqY0s7HFHPU+aJQParU2zbvU4t1zxtkXtri0xXts3fzCFMj07xyszc4vSKXkMnhooNVMhus2eDMzKjfct6m1CI8hkWWkxSusg4jmrlvPw43sTBxA/NvE17nz0uQVtqi0xjjhs6aKeciftSXNLvaiSyqhVIsNGYm4cAo3B39fnDix43dqIMbc/ev8stwmUwdBt9qTw8kqfRAmXCsalfyaffflgwB9bVosMonMHnGWHLGXqhvO7bg+0r1BpXQohLp0TFhfYR2sj3HjxqZGve+3WnZH/R+WT1aHgLgMTl6vOutJiO3Hpn3AvPal3nFfAa7eGTZSk9d+TjLecPFpfN+4erseNjeY2R0AGS8Fdei4uWGRZkbEbccE5bUJyiRkTU/XMNfJ5/F5RJ8B2DBalVqLSVPfuqC86sZ6zanlPBrml/5Rzl55KOxGo0+dOE0DXbn6o60qQcH683eumza+n/f2ytr01bx7XnsvPGF80DhH3esrFF4dy7tIX7QJd3ESgV2bmuG7rTiaffZmPrzuto9dN28OM6v0a6XvusLCKp93r5rHeetxJsZ3WZRHigvXs3H4efnzvooB9nXLxlaFSSOlYmjsCJQUFB+589LlM5XfNUsVrt+5MXUbZWqo4Nlpj2SGLJw3VRoyx0Vrsa78wPZuqfDOPSVRplzQ4elktsgQz/N7EiWpP3GC2g5YSLhn13KVjaXqo7QYEPXie5vMlpVjS9GbjAmgzxx9V7ggL76wUl8Y5bmy0beCemKpHrtLY/P9ppTkRNKthoq4G0pwcotqTlONX/r1c1HOXjqXpobabWg9vBo12a52kCVjNwc8oE1P1yMAOcPihb64XkzQJKmkyUtIa7lmrgOJeZ8QsdqJUWLuTQ1x7wlc5UbRMQXkouEvH2s26hDeDRVK6Y8QsVYolTW92vzvXbt3J6psejDw5xOXZw8+dNOM0KfDHnXxGzDIPpsa9zi1XvI2nN7+bRzaeG/t8zauHOO1ODOtWj/PIxnOJewbl38tBwV06FheAzlm1fMFiYAA7bzyf9521YlHAGK2NpL5bUZa0xiszc5ny/63P3QxwrYE0KfDHPf8B98xpjDRLGkRpd/XwvrNWAI2B03Y59DQnbyku5dylY1F16q310+E87cfXncaaE49ZlFtPmlATlrXmO23+34LnzvJ7RwXZvGfcdjLpKenq4fIzxmPfm6jX6dckM+kNBXfpSmsAWrv5ocRB1riAlSaIRJ1MXntjnunZ+Nml9enZgxOQ4koi33vWilwGCIsQDJOuHh5+fG/iexNV1nrzZadptmpJKbhLrjopA8wyU7X15JCmgiaqDr0XwapfM26TJF09JL03cfMGbr7stLaTl7RcQTFphqrkKq6MMMsMx6wrOk5MNW5KkdSD7+UMyyIFt6TZsUmzTyF6PffmcYv7HfOajSvppZ2hqgFVyVU3a6lD9MSoP370ucQyyXWrx9l54/ncuv702OftVYVHmolc/dRppU+aXn3U79irNfmle+q5S+666cmmXQcmrieex5VDFv1+vW7FvTdJvwdE9+qPjlkmGfJZk1+iaW0ZGZhOqjya0vaw4/br96BmXvdr7Ze49ybpuMWtN5O0TLLKJQcvVVrGzC40syfM7Ekz2xizzxVm9piZ7Tazz+XbTBkWaYNC3H6d1od3qiq14EnHLevvonLJYmjbczezEeA24J3AHmC7mW1z98dC+5wMfARY6+6vmNkP9KrBUl0TU3Vm9s233c+Ac1Ytj328myuHrIpQ/piXLL36JBpMLYY0PfczgSfd/Sl33wfcBVzass+/A25z91cA3P1b+TZTqq45aNd6qT82WmPtjxyzYGarA/fuqBdihcJ+XykMQtTvGLecxPjYaKV+9zJrO6BqZr8AXOjuvxr8/EvAO9z9mtA+E8A3gLXACLDJ3f884rmuBq4GWLFixRnPPvtsXr+HlFzcgF7SzZ9HzDjgPvDyw2GkEsjB6feA6lLgZOBs4Hjgb8zsNHefDu/k7rcDt0OjWian15Ye6HftdtwAZFxgDz+WNI2+SDXoZdLuuBVhwpYkSxPc68AJoZ+PD7aF7QG+4u5zwNNm9g0awX57Lq2Uvspyl6O8dHoj6KaoOx0N4veogrTHrZ9jG5Jdmpz7duBkM1tpZocAVwLbWvaZoNFrx8yOBX4MeCrHdkofDWJiSpp139tp7f1rgk1ndNyqoW3P3d3nzewa4AEa+fQ73H23md0ETLr7tuCx883sMWA/sMHdX+plw2WxvFIQg6jdbr3Mj7ubEcTn4VtL9spWg14UOm7VkCrn7u73A/e3bLsh9L0DHwq+ZADyTEHkvXRtWuHL/KQBO0i3iuSgfo+y03GrBq0tUxF5Xkp3uz5MHpJKDNOWHxbh9yijdseteZPy5s1YilCSKotp+YGKSHMpnTZtk6YSoh9VKEkDdmkG81TR0Zmk46ZB6vLQwmEV0W4BqzzrklXjXC1ZTtSDWChN5awLacnfIdPuUjrPtE0ez6VL+2LIumRxvwdbi7akcpkouFdEuzx0nn+U3T6X/mCLI+uJut8Lpakss3PKuVdIUh46zwqIbp8r6Q92mC+3ByHriTppobRepE9Ultk59dyHRNrKkTTpkm6rUPQHWxxZe+JxV4hAT67GqrKk8iCo5z5EDqstOdjjGhutsemSUzuart9tFYrqqIujkyWLo64Q125+qCdXY1VaUrnfFNyHQFR1yxvzBxbtlyVd0s26IvqDLY68ykV7dTWmctbOKbgPgbRBu1/pEv3BFkuWE3VcXr2XV2NaoKwzCu5DIG3QzvsPNGmATX+w5ZOUttPVWPFoQHUIpB2UynO6vsodq6fdFWDV70hVNuq5D4G0vao80yUqd6yedleAUVdjml06OAruQyBL0O42XdL8Y4678YbKHcsra9pO69AMloL7kOhHjjuqKqeVyh3LK0tefWKqzvV371q07v7s3H6uv3sXoADfawruQ6hXl8pRqZgwDbCVW9orwOZJPu5mK/vd1YPvA60KOWTiVnS8/IxxHn58b8cBf2KqzrVbd8Y+Pq5869CIWzmy1dhojcMPXTrQZaXLKO2qkOq5l1CWD33rvq+9MR850Hnno8/RPM1nzY02TxhxerkcrBRP2nGV6dk5pmfngMWfOeXru6fgXjJZPvRR+8ZpvX5LqmxpPWHM7Ft8wmhSKmb4xA28thP+zKnaqnuqcy+ZtEugNge0knLg7bT2wCam6qy+6UGu3bpzQf36KzNzsc+hWufhEzVfIq3mZ06Ly3VPPfeSSXs7vaQBrVbG4p47wNiy2sH8adw+ScbHRhXYh1DUwOs5q5YvGNOZ2Tcf2SlYYsbEVF2Ly+VAwb1k0nzo21WtHL2sxrJDllKfnmXEjP3ui4J3bcR49fU3/wCzBnalY4Zbu9LbuLLZZiXN5WeMc++OupYz6ILSMiWTZomApEvX0doIN1586sHnafbunUYPHho97sMPWcrcgfQhfWy0pqnnklpzuYIRs0WPzc7t5+HH92o5gy6pFLKEWgc0Wy95X3tj/mAVQtiIGbdc8TbWrR5ve6PjlRvvS91b182xpVNxnzMDnt787n43pxRUCllh4UveqIqY2ohRW2ILet6tAbhd7j5txUPUTT9E0lJuvXeUlim5qPz63H7niMOWJl7Sxv3xNAe02lU8HL2sxq3rT2fnjecrsEvH8lyJVBZSWqbkOr2sTVoHptnLBw4uAtYceNVMU8lbOM04tqyGO3xndk6zUmMoLTMkOr2sbf7BxC3utOWBJ3hk47n6w5LcxM2sbn5pVmq+lJYpuW4ua9etHudAzJWbJotIJyam6qzd/BArN97H2s0PHbw5S9TNW67dupPTf+vBg/uknaAn6ajnXkBZ1o7p9gYbGtCSvCT1vOPmXkzPzh3cR7NS81XK4F7l1eI6uTTtZq123ftS8pLU804K0M191NHIV+mCe5nyclEnIUjuZfd7waQ8b60nwy2p592utPaF6Vk+uf50dTRyVLrgXpbV4qJOQhs+vwucg/XnUSemdpemvbhq6cddmqT6knreUVeIrfuoo5Gv0gX3suTl4urPW7WemJL+QKJOGNdt3cm1W3eqRFEGLinF1/xc/taXdi9aMCzcO1dHIz+lq5aJy78VLS+X5WQT3jep+iXqhNF6g41m5YFIvzXXi4mbPLdu9ThTN5zPretPz33NmLgqnWFWuklMcbeJK9raJmlvNQZvrtIYt1ZMs+eTZr0X3fVIqiDLeFVZYkJecp3EZGYXAr8HjAB/6O6bY/a7HPg88FPu3pPpp/3My3WT3466RK2N2IKce3NbeGnd+vQs9+6oR34w06z3UrT0lEhWkeNV9+wCezO12a7MsojjcP3WtuduZiPAN4B3AnuA7cBV7v5Yy35HAvcBhwDXtAvuRV9+II/eQJreR9wKjlE3DwYSB6VAPXcpvyxXveNjo7wQTIxqVdWVJdP23NPk3M8EnnT3p9x9H3AXcGnEfv8V+ATweqaWFlQes+XWrR7nkY3n8vTmdx/MmV+3dScAn1x/Oo9sPJfvRAR2aEzuCM/ma/ZSmjlNeHP99SaVjUkVZB2vKss4XL+lScuMA8+Hft4DvCO8g5m9HTjB3e8zsw1xT2RmVwNXA6xYsSJ7a/soz6qcuNr8yWdfZkmwIFc7Ueu9VHkylwyX8Gc57d8ExJdZqqOTQymkmS0Bfhf45Xb7uvvtwO3QSMt0+9q9lOdsubirgDsffS7T7etaTywqG5MqaO38RAX22hJbkHNvmtk3DzSuaOM6OsPaCUoT3OvACaGfjw+2NR0J/ATwV9a4ZdYPAdvM7JJeDar2Qze9gdYPU1z+MOvZbdgvM6Wa4tadGTHjgPuCMadN23YvGKN6ZaaxNs3Nl50WOdZUphnteUsT3LcDJ5vZShpB/UrgPc0H3f07wLHNn83sr4APlzmwQ+dVOVEfptabT3eitsSY2TfPyo33DVXvQ6ovLtV5wH3RgOiWB55YVICQVBkzzJU0bYO7u8+b2TXAAzRKIe9w991mdhMw6e7bet3IQekk7RE30ag1wKcJ+M19xkZrvLZvYbnksPQ+pPqypECzjoVl2V619E2qGarufr+7/5i7/4i7/3aw7YaowO7uZ5e9196NuA9TOJAfvazGe89asWgmam3EGButHZy598n1p/PM5ndz+KFLF+Uatc61VEWWexJkrYxJuz1qvfmyz/gu3doyRZdmotHrcwdYc+IxrDnxmFQ9hbKspyPSiSwp0KxjYWn3r2L6RsE9Z+1Wv4Pst7HTOtdSdWlToFnHwtLuX8UOlIJ7zlo/THF59SwfGtXxirwp61hYmv2r2IEq3aqQZRCemTqew+y5dqvtiUh3urkXcVGVblXIommOsNenZxkJZtaF11aPW6Pm8jPGI1d+FJHBKEu1TK6rQspiE1P1RTceaM6siypVDH9ozlm1nHt31HsysaIsH1CRoqnajG/13DsQ1RuPErdCY9yqd92u6Dhs61qLFEG/O1R5rgopLeKmS7fKa8JFN+1SPbxI7xS5Pl7BvQNpg3BeEy7SqmI5l0iRFblDpeDegTRB2IBzVi2PfKxXI/Na11qkv4rcoVJw70BUcG7lwL076pGXZ70qbaxiOZdIkkHfGLvIHSpVy3QgqgJmJrSwV1PS9OVejMz38/6yIoNWhOV8izzBUNUyOVm58b6huo+jyKD1quosq6JWy6jnnpMqTl8WKbKi5LuLWh+vnHtOlO8W6a8i57uLQME9J1r/RaS/1KFKVrm0zCCn3xf18kykijopIBim5TkqFdyLMHouIv2TpUM1bPGhUmmZvGaLDbp2VkTyV+TZpL1QqZ57J6PnrZdpvVyxUUQGpyjVNf1SqZ571tHzqEV/7nz0uaE6u4sMi2GrrqlUcM86eh51mZbHbfFEZHDi0qrnrFqOtexb5eqaSqVlso6eZwnYx42NMjFVZ9O23UzPNpYZOHpZjRsvPlXpGpGCiBs0nXz2Ze7dUV/QeTPg8jOqW+E2tMsPTEzVuf7uXQfvnhRmLOzBN2+Lt/Xvn2fuwML9ayPGll94W2U/ICJlErckQfMWmFHGS1YSqZt1JGie3aPe7NHaCO89a8WiyUgPP753UWAHmNvvyseLFETc1XhcYId0N9goYwVdpdIyacXdSWnELHZW6XVbd8Y+n/LxIsUQt8ZTUs8dkldwLWt9/FD23OOC8QH32DcraUS9qqPtIkUV15OOK6q46h0ntL0HQ1xcKGt9/FAG905KojZccAq1Ja1j7Y2ce1VH20WKKOm+pXFrPH183WkHt8eJ+/sva338UAb3ThYcWrd6nC2/+DbGRmsHtx29rKbBVJE+a9eTXrd6nEc2nssn158ONFKqazc/BMAjG8/l1vWnL/r7r40Yr70xH5lTL2t9/FDm3LOWTLbOYt10icofRQYlTU86TZ68+Tc9tqzGq6/PHyxxbt23yHdbSjK0pZBx2i1HAI03Vsv5igxG0h2YNlxwClseeCLy8eY+rXdpSnNHpyKtJqk7MXUg6mx/56PPLZq1mjSyLiK9FdeTPmfV8kXbW0X1+tNcCZRxOe+hzLnH0XIEIsUXN2j68ON7EwM7ROfJy5pTb0c998DEVD32Ui5K2d94kTKL6kknzUWB+Dx5WXPq7QxVcI/LmzXTMXGiliMo+xsvUjVxE5ggeYmBTu7oVAZDE9yTRs/jZqzCm+vKPPz43kq98SJVE9cDT1P8UMacejupgruZXQj8HjAC/KG7b255/EPArwLzwF7gV9z92Zzb2pWk2tik/LmqYkTKoao98E61De5mNgLcBrwT2ANsN7Nt7v5YaLcpYI27z5jZfwB+B1jfiwZ3KmlEPO5ybnxsdGg/GCJlVMUeeKfSVMucCTzp7k+5+z7gLuDS8A7u/rC7zwQ/Pgocn28zu5c0It7JjFURkSJLE9zHgedDP+8JtsX5IPBn3TSqF5ICeFxplXoAIlJWuQ6omtn7gDXAv455/GrgaoAVK1bk+dKRWqtjkgZGdTknIlWSJrjXgRNCPx8fbFvAzM4DPgr8a3d/I+qJ3P124HZoLD+QubUZRFXH3Lujrh65iAyFNGmZ7cDJZrbSzA4BrgS2hXcws9XA/wEucfdv5d/M7Mq6BrOISB7a9tzdfd7MrgEeoFEKeYe77zazm4BJd98GbAGOAO4xM4Dn3P2SHra7rbKuwSwi/VOkBcHylirn7u73A/e3bLsh9P15Obera2PLarwyM7dou5YNEBEo7+3z0qrkwmETU3VefX0+8rHX3pgvxc1tRaS3qp66reTyA1seeIK5A9HjtdOzc2y4Zxe/9aXdTM/MVe5STETSqXrSh6BtAAAIkElEQVTqtpI993ZvztwB55WZuUX3XxSR4VHVpX6bKhncs745VboUE5F0qj4zvZLBPepNa6cql2Iikk7VZ6ZXMuceXh2uPj27aD32KFW5FBOR9JJmppe9TLKSwR0WvmnhN+mo0Rqv7Ztnbv+b4b5Kl2Ii0r0qlElWNriHtZ6dy35GFpHemZiqc/3du9jvC6/3m2NzZYkVQxHcW2mRMBGJ0uyxtwb2pjKNzZUquKvHLSK9lHTLTUgemytafCpNcG+XAyvagRWR8knqmSeNzRUxR1+aUsikqcLNA1ufntXEJBHpWFzPfMQssUyyiEsZlCa4J00VLuKBFZHyiZvYdMsVb0vsgRdxKYPSBPekqcJFPLAiUj6dTmwq4lIGpQnuSVOFi3hgRaSc1q0e55GN5/L05nfzyMZzU+XMi7iUQWkGVMOzTqMGTcODGTD4Aysiw6NdfBoE85h6zl5bs2aNT05O5vZ8qpYRkWFgZjvcfU27/UqTlkmiwC4islBp0jJxilhfKiIyaKXvuasMUkRksdIHd5VBiogsVvrgrjJIEZHFSh/ci1hfKiIyaKUfUC1ifamIyKCVPriD1mcXEWlV+rSMiIgspuAuIlJBlUjLiIj0UxlmxSu4i4hkUJZZ8UrLiIhkUJZZ8QruIiIZlGVWvIK7iEgGZZkVr+AuIpJBWWbFa0BVRCSDssyKV3AXEcmoDLPilZYREakgBXcRkQpKlZYxswuB3wNGgD90980tjx8KfAY4A3gJWO/uz+Tb1MXKMEtMRMotLs6k2T62rIY7TM/OMWLGfveD/473OGaZuyfvYDYCfAN4J7AH2A5c5e6Phfb5deAn3f3XzOxK4OfdfX3S865Zs8YnJyc7bnjrLDFojFjffNlpCvAikou4OHP5GePcu6OeanuSTmKWme1w9zXt9kuTljkTeNLdn3L3fcBdwKUt+1wKfDr4/vPAz5mZpW5tB8oyS0xEyisuzvzJV55PvT1JL2NWmuA+Djwf+nlPsC1yH3efB74DfH/rE5nZ1WY2aWaTe/fu7azFgbLMEhOR8oqLJ/tjMh5x2zt5jW71dUDV3W939zXuvmb58uVdPVdZZomJSHnFxZORmMRE3PZOXqNbaYJ7HTgh9PPxwbbIfcxsKXAUjYHVninLLDERKa+4OHPVO05IvT1JL2NWmmqZ7cDJZraSRhC/EnhPyz7bgA8Afwf8AvCQtxup7VJZZomJSHklxZk1Jx7Tdnuhq2UAzOwi4FYapZB3uPtvm9lNwKS7bzOzw4DPAquBl4Er3f2ppOfstlpGRGQYpa2WSVXn7u73A/e3bLsh9P3rwC9mbaSIiPSGZqiKiFSQgruISAUpuIuIVJCCu4hIBSm4i4hUkIK7iEgFKbiLiFRQqklMPXlhs73AswN58WjHAt8edCMyKFN7y9RWKFd7y9RWKFd7i9rWE9297eJcAwvuRWNmk2lmfRVFmdpbprZCudpbprZCudpbprZGUVpGRKSCFNxFRCpIwf1Ntw+6ARmVqb1laiuUq71laiuUq71lausiyrmLiFSQeu4iIhWk4C4iUkFDGdzN7AQze9jMHjOz3Wb2n4Ltm8ysbmY7g6+LBt1WADN7xsy+FrRpMth2jJl92cz+Kfj36EG3E8DMTgkdv51m9l0zu7Yox9bM7jCzb5nZ10PbIo+lNfwPM3vSzP7BzN5ekPZuMbPHgzZ90czGgu0nmdls6Bj/fgHaGvu+m9lHgmP7hJld0M+2JrR3a6itz5jZzmD7QI9tR9x96L6AtwBvD74/EvgG8FZgE/DhQbcvor3PAMe2bPsdYGPw/UbgE4NuZ0S7R4B/AU4syrEFfhZ4O/D1dscSuAj4M8CAs4CvFKS95wNLg+8/EWrvSeH9CtLWyPc9+HvbBRwKrAT+GRgZdHtbHr8FuKEIx7aTr6Hsubv7i+7+1eD77wH/CJTt5quXAp8Ovv80sG6AbYnzc8A/u3thZiK7+9/QuBVkWNyxvBT4jDc8CoyZ2Vv609KGqPa6+4PuPh/8+CiNm9YPXMyxjXMpcJe7v+HuTwNPAmf2rHERktprZgZcAfxJP9uUp6EM7mFmdhKNe79+Jdh0TXC5e0dRUh2AAw+a2Q4zuzrY9oPu/mLw/b8APziYpiW6koV/HEU8thB/LMeB50P77aF4nYBfoXF10bTSzKbM7K/N7GcG1agWUe970Y/tzwDfdPd/Cm0r4rGNNdTB3cyOAO4FrnX37wL/G/gR4HTgRRqXZUXw0+7+duBdwG+Y2c+GH/TGdWOhalrN7BDgEuCeYFNRj+0CRTyWcczso8A8cGew6UVghbuvBj4EfM7Mvm9Q7QuU4n2PcBULOyZFPLaJhja4m1mNRmC/092/AODu33T3/e5+APgD+nyZGMfd68G/3wK+SKNd32ymCIJ/vzW4FkZ6F/BVd/8mFPfYBuKOZR04IbTf8cG2gTOzXwb+DfDe4IREkOJ4Kfh+B4089o8NrJEkvu9FPrZLgcuArc1tRTy27QxlcA/yaX8E/KO7/25oezif+vPA11v/b7+Z2eFmdmTzexqDaV8HtgEfCHb7APCng2lhrAU9nyIe25C4Y7kNeH9QNXMW8J1Q+mZgzOxC4DeBS9x9JrR9uZmNBN//MHAy8NRgWnmwTXHv+zbgSjM71MxW0mjr3/e7fTHOAx539z3NDUU8tm0NekR3EF/AT9O49P4HYGfwdRHwWeBrwfZtwFsK0NYfplFVsAvYDXw02P79wF8C/wT8BXDMoNsaavPhwEvAUaFthTi2NE44LwJzNPK8H4w7ljSqZG6j0Uv7GrCmIO19kka+uvnZ/f1g38uDz8hO4KvAxQVoa+z7Dnw0OLZPAO8qwrENtn8K+LWWfQd6bDv50vIDIiIVNJRpGRGRqlNwFxGpIAV3EZEKUnAXEakgBXcRkQpScBcRqSAFdxGRCvr/pwbJqxPcJpEAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7f24206fe630>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.title(\"text length vs STD\")\n",
    "plt.scatter(list(text_vs_std.keys()), list(text_vs_std.values()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.collections.PathCollection at 0x7f24205f2358>"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEICAYAAABRSj9aAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X2UHHWd7/H3N0MDE0SGyCxLhodExHBFlwTmetkbdQEfIrBClJWH9YFV76LnrucsrEbD6lmz6F2jWRZ3r3vVsHIQQQwajFG8C6xh17vcjTgxAwEhF5DwMAYYgQExI06S7/2jqiY1PVXV1c/d1Z/XOXOmu7q7+tfVM9/61ff3ZO6OiIgU15x2F0BERJpLgV5EpOAU6EVECk6BXkSk4BToRUQKToFeRKTgFOilKczMzewVbXjfU83s8Va/by3M7C/N7J/aXQ4pPgX6HmFmO8zsTQ3a1zVm9plG7Kte7TqhpDGzM83sG+Hta83s7LTnuvvfuPt/q/P9FoTHYL969iPFpkAv0lgnAyOx2z9tY1lEAAX6nmBmXweOBr5nZi+Y2cfC7aeY2f81swkzu8vMTg23zzOzx83sbeH9l5jZg2b2XjO7GHgX8LFwX9/L8f4HmNnfmtmjZvakmX3ZzPrDx04N3+sjZvaUme00s/fFXvsyM/uemT1vZj8xs8+Y2b+Hj/0ofNpdYVnOj70ucX9l5TrfzEbKtl1qZhvD22ea2c/M7FdmNmZmH81xuIeBLWZ2EDDP3VPTSGa2ysyuC29HNfOLwuP0SzP7ROy5rzWzkfA4PGlmfxc+FB2DifAY/L6ZHWtmm8zs6XA/15vZQGxfO8zso2Z2t5k9Z2brzOzA2OPnmNlo+F4Pmdlbw+2HmNlXw2M6Fn4XfeFjrzCzfwv390szW5fjWEmruLt+euAH2AG8KXZ/CHgaOJPghP/m8P5g+PhbgCeA3wGuAr4de+01wGcqvJ8DrwhvXwlsBOYBBwPfAz4bPnYqsBu4HCiF5dkFHBo+/s3wZy7wKuAx4N+T3ifP/srKOBf4FXBcbNtPgAvC2zuB14e3DwVOyvi824EJYA/wHPACMBVu+0rKa1YB14W3F4Sf5SqgHzgReBH4T+Hj/wG8J7z9EuCUstftF9vvK8Lv8wBgkOBk8IWyv4U7gfnhd3If8KHwsdeG5X9z+HcxBBwfPvYd4CvAQeHfxZ3AB8PHbgA+Eb7mQOB17f6b10/sb63dBdBPi77o2YH+48DXy55zC3BR7P7/BLYBY8DLYtuvIWegBwz4NXBs7LHfBx4Ob58KTJYFqqeAU4C+MFguij32GSoH+sT9pZTzOuCvwtvHEQT+ueH9R4EPAi/NeYzfBHwnvL0WeGeF569idqA/Mvb4new76fwI+GvgsLJ9zAr0Ce+zHNha9rfw7tj9zwNfDm9/BbgyYR+HE5x4+mPbLgRuD29fG37mI9PKoZ/2/Sh107uOAd4Zpm0mzGwCeB1wROw5a4FXA9e4+9M1vs8gQc15S+x9/jncHnna3XfH7u8iqLUOAvsR1OIj8dtp0vaX5BsEAQvgj4EN7r4rvH8uwRXBI2Fa4veTdmBmnw8/183AW8LbHwCuMrMncpQ3Lv78eLk/ALwSuD9MYf1h2g7M7HAz+2aYXnme4GR2WM73OQp4KGG3xxBcIe2MfY9fIajZA3yM4KR+p5nda2bvr/RBpXUU6HtH+TSljxHU6AdiPwe5+2qAMPe6lqCm9t/LerZUM+XpLwlq2CfE3ucQd08LvHHjBGmYI2PbjqrivfO4DRg0s8UEAf8b0QPu/hN3P4cgmG0Abkzagbt/zN0HgIcJrmL+APiP8LP+biMK6e4PuPuFYVk+B3w7bAdI+i7+Jtz+Gnd/KfBugiCcx2PAsSnbXyS4ooi+x5e6+wlh+Z5w9z919/kEV0H/yzqoN1SvU6DvHU8CL4/dvw54m5ktM7M+MzswbBiNgupfEgSL9wNrgGujhreEfaVy970Eeecrzex3AMxsyMyW5XjtHuAmYJWZzTWz44H3VvhcVXH3KeBbBJ9xHkHgx8z2N7N3mdkh4XOeB/am7cfMDgYOdvedwEns63nTEGb2bjMbDI/nRLh5L8HJcC8zj8HBBG0Ez5nZELCiirf6KvA+M3ujmc0Jv6vjw891K3CFmb00fOxYM/uDsHzvjP3tPEvwt5N6vKS1FOh7x2eBT4aX3R9198eAcwgC+jhBjW0FMMfMTgb+AnhvGGw/R/CPuzLc11eBV4X72pDjvT8OPAhsDlMJ/wIsylnuDwOHEKQavk7Q6Pdi7PFVwNfCspyXc5/lvkGQX/9WWcrnPcCOsMwfIuhtlGYJMBrePgnYUmNZ0rwVuNfMXgD+niB3Pxmmmf4HcEd4DE4hyOWfRNCoejPByTIXd78TeB9BA/pzwL8RpG0gOMnuD/yMIJh/m32pvv8M/Dgs30bgz93953V8Xmkgc9fCI9I9zOxzwO+6+0XtLotIt1CNXjqamR1vZr9ngdcSNEp+p93lEukmGjYtne5ggnTNfIJ8/BXAd9taIpEuo9SNiEjBKXUjIlJwHZG6Oeyww3zBggXtLoaISFfZsmXLL919sNLzOiLQL1iwgJGRhnY7FhEpPDN7JM/zlLoRESk4BXoRkYJToBcRKTgFehGRglOgFxEpuI7odSMi2TZsHWPNLdv5xcQk8wf6WbFsEcuXDLW7WNIlFOhFOtyGrWNcdtM2Jqf2ADA2McllN20DULCXXCqmbszsagsWWb4ntm1duHjwaLjQ8Gi4fYGZTcYe+3IzCy/SC9bcsn06yEcmp/aw5pbtbSqRdJs8NfprgC8SrDQEgLufH902sysI5q2OPOTuixtVQJFe94uJyaq2i5SrWKN39x8BzyQ9ZmYGnEcwu6CINMH8gf6qtouUq7fXzeuBJ939gdi2hWa2NVxM+fV17l+k561Ytoj+Ut+Mbf2lPlYsy7tIl/S6ehtjL2RmbX4ncLS7Px0uR7fBzE5w9+fLX2hmFwMXAxx99NF1FkOkuKIGV/W6kVrlmo/ezBYA33f3V8e27QeMASe7++Mpr/tX4KPunjlj2fDwsGtSMxGR6pjZFncfrvS8elI3bwLujwd5Mxs0s77w9suB4wAtECwi0kZ5ulfeAPwHsMjMHjezD4QPXcDsRtg3AHeH3S2/DXzI3RMbckVEpDUq5ujd/cKU7X+SsG09sL7+YomISKNorhsRkYJToBcRKTgFehGRglOgFxEpOAV6EZGCU6AXESk4BXoRkYJToBcRKTgFehGRglOgFxEpOAV6EZGCU6AXESk4BXoRkYJToBcRKTgFehGRglOgFxEpOAV6EZGCU6AXESk4BXoRkYLLszj41Wb2lJndE9u2yszGzGw0/Dkz9thlZvagmW03s2XNKriIiORTcXFw4Brgi8C1ZduvdPe/jW8ws1cBFwAnAPOBfzGzV7r7ngaUVaRlNmwdY80t2/nFxCTzB/pZsWwRy5cMteV9WlUWKa6KNXp3/xHwTM79nQN8091fdPeHgQeB19ZRPpGW27B1jMtu2sbYxCQOjE1Mcsm6UZZcfisbto419X0uu2nbjPfI8xyRSurJ0X/YzO4OUzuHhtuGgMdiz3k83CbSNdbcsp3JqdkXoc/umuKym7bxyQ3bWLp6EwtX3szS1ZtqDrpJ7zM5tYc1t2yv6jkildQa6L8EHAssBnYCV1S7AzO72MxGzGxkfHy8xmKINN4vJiZTH5uc2sP1mx9tSA077X3i2/M8R6SSmgK9uz/p7nvcfS9wFfvSM2PAUbGnHhluS9rHWncfdvfhwcHBWooh0hTzB/ozH/ey+7XWsNPeJ749z3NEKqkp0JvZEbG7bweiHjkbgQvM7AAzWwgcB9xZXxFFWmvFskX0l/qqek0tNeyk9+kv9bFi2aKqnrNh61hDUklSXBV73ZjZDcCpwGFm9jjwKeBUM1tMULnZAXwQwN3vNbMbgZ8Bu4E/U48b6TZRj5ZVG+9lYnJqxmPG7Bo97KthV9NDJtqe9fxKz4kaa6M8fpRKir9WxNyT/mxba3h42EdGRtpdDJFZygP3accPsn7L2IwG0v5SH599x2sAZgTd+GN5gm4t3SiXrt7EWMLVxNBAP3esPD3vx5QuZWZb3H240vPy9KMX6VnLlwzNCrbDx8xLDMhLV29K7SFTKWDXWjNXY63koUAvUqWk4A/1Bd2sbpRZgX7+QH9ijX6OGRu2jil9I4DmuhGZoZ6GzbSeMA4VB1vVepJIazje466BVTJNgV4KqZaAXc8o1A1bx9j1292pjz+7a4oV374rdV9pJ4k5ZpmfYfmSIT77jtfQZzbrMQ2skogCvRROtQE7Oilcsm60plGo0fs9u2sq83lTezx1X1k186zPEDXg7knpVKFcvYACvRRQNdMGxE8KaSoFy7QpE6rZV1QzHxroxyBXDT1P2TWwSkCNsdLF0rojVpPvzhOkKwXLamrNUSomrc98dH/hypsrvlelspcPrJLepUAvXSmrO2JaT5SkgF0pSMeDZdqJJe39kkQplrGJSS5dN8ol60YZSgj6eT5DVtmT9im9S6kb6UpZ6Zk80wZEsmrrQwP904OdsvL+Se9X6jP6S/v+vebMzsRMj7BNyr/n+QxpZY8GSyUFeU2X0JsU6KUrZaVnyvPd8YBdLi2gfuH8xTOCZaV+7uXvt+aPTuS+T5/BjtVnsWP1WVQagF6ef8/zGao5oYHmtu9lSt1IV6qU2kgb1FQuz3wzULmfe6X3y5PeKX+PSvvMW3YIgvxHbrxrVu+c+MlKK1kVlwK9dKUVyxYlzitTS+NjnpNCWqAemFti6epNFYNjUnmT3qORZY8C99jEZOpkbBCcYDQ5WrEp0EtXqqY2W6+0wVClPuOF3+ye7j+fFRzj5U0KvI3uIVMeuLMyR/MH+muegiF6L10JdDYFeulaedMz9SgPmJGB/hLArGmMs4JjebDvM2OPe8N7yKSlaZJEJ5hL140mPl6pV5KuBLqDGmNFMqT1VT/ogP14rizIR9KCY/kApz3u04G2kUH+spu25QryfWbTDby1rmSlNW27gwK9SIasRthqg2MrgmLeUbr9pT6uOO/E6RNMtT14IpomuTsodSNdr5k54qzePXkbhOONokmqCYqVPmvWvqJ2gShVBMxoSD735CFuv3+8quNYzeA0aR8FeulqlXLE9Z4EsoJ5ngbhtBx/XN6gmCcfnhZ4+8xm1OCT9rV+y1ju1bAijez9JM2jQC9drVI6pN6GwkrBvFKDcCPno8nTMyYt8JYH8Hp62cS1sveT1C7P4uBXA38IPOXurw63rQHeBvwWeAh4n7tPmNkC4D4gSjpudvcPNaHcIkB2jriRwazWwNXI+Wjy5MMbNQAs6Uoobb+t6P0k9clTo78G+CJwbWzbbcBl7r7bzD4HXAZ8PHzsIXdf3NBSiqTIyhF3QkNhWvlqWbw7bz68ngFg8wf6E9M6K759FzhM7d03KZu6UXaPir1u3P1HwDNl225192gEyWbgyCaUTaSirN4iWUv7HXvZD1hQ58ReeSYIq7U3S5JW7SvpSmhqj08H+Yi6UXaPRuTo3w+si91faGZbgeeBT7r7/2nAe4gkqpSqSGsIjU8XXEvNNO9AoVpy2GkNyI3Mh2ftK23wVBJ1o+wO5jkGVoS59+9HOfrY9k8Aw8A73N3N7ADgJe7+tJmdDGwATnD35xP2eTFwMcDRRx998iOPPFLvZ5GCaUS3yUpdGyPVplKWrt7UsJRMXFIvnaTG1GZK+2xJ6v28Uh8z2+Luw5WeV/OAKTP7E4JG2nd5eLZw9xfd/enw9haChtpXJr3e3de6+7C7Dw8ODtZaDCmApBRI0pS6l64b5ZMbtlW17+VLhrhj5ekkTAc/Q7U102bl/zthpGna/Pqlskn11Y2ye9SUujGztwIfA/7A3XfFtg8Cz7j7HjN7OXAc8POGlFQKKS0FcmBpzqyA58D1mx9l+Jh5VdduK00TXO0An2YNFOqEBuS0tE7SNjXEdoc83StvAE4FDjOzx4FPEfSyOQC4zYJFjKNulG8ALjezKWAv8CF3fyZxxyKk12DT+p57+JpqA8xpxw9y/eZHE2dx7C/1cdrxg7mmG440a6BQq0aaVkqLpfXcUWDvThUDvbtfmLD5qynPXQ+sr7dQ0jtqqalW+5oNW8dYv2UsMcgPDfRz2vGDrN8yVtXAqmYNFGrFSFPNONl7NDJW2ip1QY/+Es9NTiUG5zlmLFx5c+7gmjY6NWpIXLp6U00Dqxo5UChewz6kv8SBpTlM7JpqSoqkUjuA0jPFo0AvbZVWg1119gmMPPJMYrql2q6RlfLe7c6Ll9ewJyan6C/1ceX5i+sOskkpmrTPFR1P1fSLR9MUS1tlLYL9meWv4crzF08/1mez+87k6ZGSlt+OlgFM62DcqhkYm9XTJm0x8EPCRVOStLvHjzSHavTSdlkpkOixDVvHuKTGVZCSrhrKlwEs18qug63uqnlgaQ79pb5c89Y3ohzSfqrRS8dIm1IgqpmmmWOWOY1B0lXDQfvvN2tIfyR+VdEKta7uVElagJ7YNTV9PPLQ3PLdTzV66QhZPUEqTfW7xz1XL5n4YwtX3pz4PIOWj/RsR1fN6HgsXHlz5sLhUTm0AHh3U41eOkJWnjpP6qDaXHKzatG1yGqnqEeeSdCyPm9UDiAx11/rZHDSeqrRS0eotDZrnrlXqskld9rKSM2Y0z1PX/88C5XU2v1UOocCvXSEatdmTdtHXr2yMlKlE0ie49Du7qdSPwV66QjVrM06MLfEC7/ZPaMxtZbauFZGClQ6DloAvPvlmqa42YaHh31kZKTdxZAWqmapuryvV9Bujk6YOlmS5Z2mWIFeWq6ZgUMngObQce1MeQO9UjfSNGnBodKi3bUGFU3W1Xjl38WV5wfLQa+5ZTuXrhtV0O8SCvTSFFlBN6txr55gXekEItVJXCT8W3eBBWvIRtt0Mu186kcvTZEVdLP6sNcz74t6hzRW4iLhe306yEc0H07nU6CXpsgKukkDeQB+/eLu1P7yeYJ1Jw2Cara06SIaqZoTpE6mnU2BXpoiK+hGI0EPnTtzFsWJyanUtV3zBOs8I0GLIG1WykYH+2pOkEU8mRaJAr003IatY+z67e5Z2+NBd/mSIebuP7uJyGFWsM8brJs1lUCnadUC4omLhM8xSn1aJLzbqDFWGiqp6yQEK0atOvuEXCMunSBI19KVrxcGQbWqLUKLhBeHAr00VNpMkwcdsN+sYJA24jJa4k+StXKkqhYJL4ZcqRszu9rMnjKze2Lb5pnZbWb2QPj70HC7mdk/mNmDZna3mZ3UrMJL56mmttkrOfVG03GTauWt0V8DfBG4NrZtJfBDd19tZivD+x8HzgCOC3/+C/Cl8LcUSDSQZmxikj4z9rgzNNDPwNxS4qpNUW2zfADOuScPcfv940oDVKFXJmSTxsk9BYKZLQC+7+6vDu9vB051951mdgTwr+6+yMy+Et6+ofx5afvWFAjdJS0PD0FjXXxADeyb3gDQnCkiDZR3CoR6et0cHgveTwCHh7eHgMdiz3s83FZewIvNbMTMRsbHx+sohrRa1opPU3udg/bfL7HnS6t6i4jITA1pjHV3N7OqZkdz97XAWghq9I0oh7RGpd4dz01OMfqpt+R+nQbbiDRXPTX6J8OUDeHvp8LtY8BRsecdGW6TgqjUu6PaEaoabCPSXPUE+o3AReHti4Dvxra/N+x9cwrwXFZ+XrpP2hQGEAx2GpuYTByWr94iIu2RK3VjZjcApwKHmdnjwKeA1cCNZvYB4BHgvPDpPwDOBB4EdgHva3CZpc3ivT7ivW6MYLATJM9qqN4iIu2hhUekIZZcfmtit0oNfhJpnlb0uhEBgu6WSUEe1NAq0gk0BYJUJWn1p6zukWpoFWk/BXrJLW31p7Q+9UBLG1q1rqlIMgV6SZRWc08a8BQ1xpYb6C+1LNBqvViRdAr0Mku1Nfc97vSX+mZNbbDq7BNaUl7QerHtoCuo7qHGWJklq+aeJJrmoJ0LfmjUbWu1apUraQzV6GWWtOCYVnOPanJpgb0VNb9WztEu6ZWBv/7evYmzmqq2316q0cssacGxlpp7q2p+GnXbWmmVgWd3TU2fcKN2G9X22081epllxbJFidMJV6q5J2lV7lyjblsr7QoqjdpL2kuBvgdVSqU0Mmi2MnfeC+vFdoqkykAlai9pHwX6HpPUo+bSdaNcsm6UoYF+Tjt+cMaKT1eev7iu4KnceTElVQZ+/eJuJiaTR0iDvvN2Uo6+xySlUuITkV23+dGG5tOVOy+u5UuGuGPl6Ty8+qyK36e+8/ZSoO8x1V4+17sC1PIlQ23veinNFV0lltfm54S9cfWdt59SNz2m2kY0qD+3qtx5saUtLXnEIZq5tFOoRt9jshYNSaPcqmSptsF9w9Yxlq7exMKVNycuUCONpxp9jylfNCS+WEgS5Valkmoa3DUnUXuoRt+Doka0HavP4srzF8/In7/7lKOVT5eqVNPgnjWuQppHNfoeF+XPo771129+dFa3Sk1eJVmqGXehOYnaQ4FeMi+nAV1qS0V5G9w1rqI9ak7dmNkiMxuN/TxvZpeY2SozG4ttP7ORBZbGy7qc1qW2NJLGVbRHzYHe3be7+2J3XwycDOwCvhM+fGX0mLv/oBEFlebJupxOe2xsYlK9JaQqUQowPuW12oFao1GNsW8EHnL3Rxq0P2mhtMvm+QP9mZfUmpFQ8orPYgr7prxWe09rNCrQXwDcELv/YTO728yuNrNDk15gZheb2YiZjYyPjzeoGFKLrMvprH73SuFIXkoBtlfdjbFmtj9wNnBZuOlLwKcJumd/GrgCeH/569x9LbAWYHh4OKsrtzRJvDfNIf0lDizNYWLXVGKviUvWjSbuQ70lJA/1tmmvRvS6OQP4qbs/CRD9BjCzq4DvN+A9pEZpXSPLe9pMTE7RX+pL7VaZtgB43t4S6qLZm6LvPa0mN8eMhStv1t9EkzUi0F9ILG1jZke4+87w7tuBexrwHlKDrG6TlRYEKX9tUpDP21tCoyF7U/n3nqR8FSrQ30Qz1BXozewg4M3AB2ObP29miwlSNzvKHpMmSaoxZwXzSpfSaRNV9Zmx172qGlirVpmSzpL2NwQkXiHqb6J56gr07v5r4GVl295TV4mkamk15rR/suhkkDVwJe1EsNedh1efVVX5lJ/tTVnfb9IVYqXXSO00100BpNWY00S18ayBK1ldLqvVyH1J98j6fqN+9NW8RmqnQF8A1dSC4n2XsxYEaeQIRo2G7E1Z32/Ujz5OfxPNo7luCqDSYiJpefWs+UkauUB4I/cl3WP5kiFWbbw3cR3ZoVg7kv4mms88JVfWSsPDwz4yMtLuYnSdqAG20rzyBjy8+ix1cZSWS+p501/q07QHDWJmW9x9uNLzVKPvUuX/QFmn6/kD/eriKG2hq7nOoBp9l1q6elPutV/nluYwuXsvSV/10IDW9RTpVqrRF1w1DbC7pvY2ZD8i0p0U6LtUpQbYavYj0m7l7UenHT/I7fePK93TIOpe2aWyZpXMS93ZpBPEpzB2gvaj6zY/OuO+psSuj2r0XaS81nPuyUN8/66ds7qvZfXAifSZqeeDdISsqRIimh6hPgr0HSbeZTKaD2QovJRdv2VsRq+Z9VvG+Ow7XgMw67I3/txy6t4mnSRvO5Hak2qnQN9B0maMHJuY5PrNj86qpUe1nDtWnj4raA8fM286+A/MLeEOz00mzzUv0k5525vUnlQ7BfoOknUJm5aKSavlZI16FekkK5YtqjidsdqT6qPG2A5Sy6WpajnS7ZLmXXr3KUenzsMk1VONvoNU22VStRwpCl2BNpdq9B2kmi6TquWISF6q0bdR0iRjn33Ha6Z73aQx0LQFIpKbavRtkjRIJJpk7I6Vp7Nj9VkMacEOKbgNW8dYunoTC1fezNLVmzQoqkkU6Nskax3ViBbskCJLq+wo2Dde3YHezHaY2TYzGzWzkXDbPDO7zcweCH8fWn9RiyXPOqrx3ggQjGaNTgb6Z5Bul6eyI43RqBr9ae6+ODZd5krgh+5+HPDD8L7E5F1HdfmSoemafXwAlWo+0u3SKjtjE5NK5TRYs1I35wBfC29/DVjepPfpWtWkZVTzkSLKamtSKqexGhHoHbjVzLaY2cXhtsPdfWd4+wng8PIXmdnFZjZiZiPj4+MNKEZ3qbQ4d1yeNI9It8nTnXhyag+XrBtV7b5Ojehe+Tp3HzOz3wFuM7P74w+6u5vZrBH87r4WWAvBClMNKEdXqGXd1rSBVE6w0pTmrpFuVL7MYFYQ0NKX9WnoUoJmtgp4AfhT4FR332lmRwD/6u6pXUV6ZSnBtIWSzz15KHORhaTXxWk2SimCPMtjaunLmfIuJVhX6sbMDjKzg6PbwFuAe4CNwEXh0y4CvlvP+xRFWq79+gqLLJT3vimnfL0UQZ5UjtKVtak3R3848O9mdhdwJ3Czu/8zsBp4s5k9ALwpvN/TNmwdS62tJE0//JEb75oV7O9YeTqWsn/9A0i3q1ShAZhjph45NagrR+/uPwdOTNj+NPDGevbdjdLy71HqpRp73BNzkmn5eo2WlSKIJjdLS1eWdzGOXiPZNDK2QbJG+eVZKi1JUkpGo2WlF5T3Suuz2deySlnmp0nNGiSrr3s9aZXy15b3VNCKUVJU8amLF668OfE5Slnmo0DfIFl93audZz4uKSWjubul1yhlWR+lbnLIM8Ne1pQGeXoTlOYYpb6Zl6dKyYgElLKsjwJ9BXln2Mv6Q8yzVNqad57Imj86UcuniSSI/ocG+kvT2w4sKXzlpdRNBVm593gQrpQ7z5tuUWAXSffi7r3Tt5/dNaWeNzkp0FdQzTwzyp2LNE/eSpfMpmufCvJOJywizaXJ/WqnQF+BGoFEOoMqXbVT6qYC9VsXaa9o0OHYxCTGzClDVOnKR4E+B+XeRdqjfCoEh+lgP6RKV25K3TSZVrkXqV1SA6zDdDfLS7UoSS4K9E2kVe5F6pPW0DoxOTXj/+qSdaMsufxW/W+lUOqmCeI5xXLqDiaSXzXTh6hffToF+gaIT088MLfEC7/ZzdTe9JW71B1MJJ8VyxZlrq5WLlrLARTs4xTo61TVtMX8AAALZElEQVTeWPTsrqmKr1F3MJF8knq97frt7sz/s7S1HHqZAn2NstIzWdQdTKQ65b3eKq2hDEqRllOgr0GeP7QkBpx7srpqitQj+v9ZtfFeJibTa/ZKke6jXjdV2rB1jI/ceFdNK0Y5cPv9440vlEiPWb5kiNFPvYUvnL84cfUpUIo0ruZAb2ZHmdntZvYzM7vXzP483L7KzMbMbDT8ObNxxW2vqCYfrVuZpnxe+TjVMkQaZ/mSIa4478RZ05QYcNrxg+0pVAeqJ3WzG/iIu//UzA4GtpjZbeFjV7r739ZfvOZJW8g7S561Xw+dW+JTbzshNX+vWoZIYy1fMsTII89w/eZHp6dHcGD9ljGGj5kHaAqTmgO9u+8Edoa3f2Vm9wFdcfTKc+x5V5TPUxv/zVQwX3ZStzA1xIo0x+33j1N+nT05tYdVG+/lxd17q/5fL5qG5OjNbAGwBPhxuOnDZna3mV1tZoemvOZiMxsxs5Hx8dbmrbPmtc6SpzYeb+0vX1VKK0aJNEfWCNpa/teLpu5eN2b2EmA9cIm7P29mXwI+TXD19GngCuD95a9z97XAWoDh4eHspHeD1Tqvdd7BG9F+NBmaSGtUM4IWeq+trK4avZmVCIL89e5+E4C7P+nue9x9L3AV8Nr6i9lYtc5rXV5LV2u/SGdIWzfi0LmlxOf32v9oPb1uDPgqcJ+7/11s+xGxp70duKf24jVHPYuJLF8yxB0rT+fh1WcltvYrDy/SevFKGASVsOjKuzRnZoWsF/9H60ndLAXeA2wzs9Fw218CF5rZYoLUzQ7gg3WVsMGi3jaTU3voM2OP+6x5rZN65EB6y32vt+iLdILo/658SpJSnzHQX+K5yame/R81r9AnvBWGh4d9ZGSk6e+TNKK1NMd4yYH7MbEr+CM47fhB1m8Zm/UcDKb27DtW/aU+Na6KdJilqzcl5ur7zLjivBML9/9qZlvcfbjS83pqZGxSb5upvc6zu6am57W+bvOjic+JB3nozZZ7kU6X1sgaTXTWq/PV91Sgb3RLe6+13It0uqxG1l6unPVUoG90S3uvtdyLdLqkjhZx5ZWzXlnqs6dmr6x2EYMsvdhyL9Lpohz8R268K3FOqqhytmHr2KzZL4s8aranAn15L5lD+kv8+re7Z+XfK9Hq8yKdK6n3DQQTnY1NTLLk8ltTV4Er6jz2PRXoIXkRg7Szfzn1tBHpDvFK3djEJAbTc+FUWgWuiG1vhQv01c5KmXX2/6/HzmPH05PqIy/ShaJKXVqXyzSNaHurZXbcZipUoK91VkoNfBIprmpq6I1oe6s1DjVToQZMpZ25B/pLHHTAfgriIj0ob40+Wkui3tiQ9n5DA/3csfL0uvZdricHTGVNVTo2MTk9KOqSdaMsufzWwnalEpF9KnW57DPjC+cvnl4wqN6ulrXOjttMhQr01eTWnt011dMj5UR6RTThWZq9YVbjspu2NaRCWOvsuM1UqEBf6cxdrpdHyon0kuVLhqZntiw3f6A/dZnQZ3dNcem6URZUUcuvZ3bcZilUoK905k5SxK5UIjJbWgA+7fjBzBx+1IoZNapWCvaduLpcoXrdQHCQ0xbmTqJpDER6Q1Lvumi22rzyDqjqtNXlChfoIThzX7pudNZiweXafTklIq1VHoCXrt5U9ZQo3ZgFKFTqJrJ8yVBmkO+UyykRaa9agvYcs67rxNH1Nfq0EWhDKYsFN6Mvq4h0p2oXFYd9c9tDUKnstFGwSbp6wNQnN2zj+s2Pzqq9Hzq3xFm/d8SslaI0V42IxCWtOtdf6uOA/ebMmNkySbQUaXweHWD6fismP8w7YKprA/2GrWOZefj+Uh/nnjzE7fePd/SZVkTaK22N6EZMaR5fqnRgbgn3YABndJJIW7c6r7yBvmmpGzN7K/D3QB/wT+6+upH7X3PL9sw8/OTUHm6/f1xpGhHJlNVDppoefEmipUph5qyZ0Wy50e9mz4fTlMZYM+sD/hE4A3gVcKGZvaqR75GnEaUbW8dFpDMsXzLEHStP5wvnL65qIGatmjmAs1m9bl4LPOjuP3f33wLfBM5p5Bvk6f+uPvIiUq9aBmLWqlmV02YF+iHgsdj9x8Nt08zsYjMbMbOR8fHxqt+g0nQH6iMvIo2SNYVCIzWrctq2fvTuvtbdh919eHBwsOrXlw8zPnRuiYH+kvrIi0hTpFUuB/pLvPuUoxMf65tjufffzMppsxpjx4CjYvePDLc1VKcNMxaR4qq0QNHwMfMSe+9E25rR6yavpnSvNLP9gP8HvJEgwP8E+GN3vzfp+Y1aeEREpJe0tXulu+82sw8DtxB0r7w6LciLiEhzNa0fvbv/APhBs/YvIiL5FHJSMxER2UeBXkSk4BToRUQKriMmNTOzceCRdpcj5jDgl+0uRBW6qbzdVFborvJ2U1mhu8rbqWU9xt0rDkTqiEDfacxsJE+XpU7RTeXtprJCd5W3m8oK3VXebiprEqVuREQKToFeRKTgFOiTrW13AarUTeXtprJCd5W3m8oK3VXebirrLMrRi4gUnGr0IiIFp0AvIlJwPR/ozewoM7vdzH5mZvea2Z+H21eZ2ZiZjYY/Z7a7rABmtsPMtoVlGgm3zTOz28zsgfD3oe0uJ4CZLYodv1Eze97MLumkY2tmV5vZU2Z2T2xb4vG0wD+Y2YNmdreZndQBZV1jZveH5fmOmQ2E2xeY2WTsGH+5A8qa+r2b2WXhcd1uZstaWdaM8q6LlXWHmY2G29t6bGvi7j39AxwBnBTePphgeuVXAauAj7a7fAnl3QEcVrbt88DK8PZK4HPtLmdCufuAJ4BjOunYAm8ATgLuqXQ8gTOB/w0YcArw4w4o61uA/cLbn4uVdUH8eR1yXBO/9/D/7S7gAGAh8BDQ1+7ylj1+BfBXnXBsa/np+Rq9u+9095+Gt38F3EfZsodd4Bzga+HtrwHL21iWNG8EHnL3ThoBjbv/CHimbHPa8TwHuNYDm4EBMzuiNSVNLqu73+ruu8O7mwkW+Wm7lOOa5hzgm+7+ors/DDxIsO50y2SV18wMOA+4oZVlaqSeD/RxZrYAWAL8ONz04fCS+OpOSYcADtxqZlvM7OJw2+HuvjO8/QRweHuKlukCZv6jdOKxjaQdz4prIbfZ+wmuOCILzWyrmf2bmb2+XYUqk/S9d/pxfT3wpLs/ENvWicc2lQJ9yMxeAqwHLnH354EvAccCi4GdBJduneB17n4ScAbwZ2b2hviDHlxbdlSfWTPbHzgb+Fa4qVOP7SydeDyTmNkngN3A9eGmncDR7r4E+AvgG2b20naVL9Q133uZC5lZSenEY5tJgR4wsxJBkL/e3W8CcPcn3X2Pu+8FrqLFl5Jp3H0s/P0U8B2Ccj0ZpRDC30+1r4SJzgB+6u5PQuce25i049mStZCrZWZ/Avwh8K7wxESYBnk6vL2FIO/9yrYVkszvvSOPK0wvi/oOYF20rROPbSU9H+jD/NtXgfvc/e9i2+O517cD95S/ttXM7CAzOzi6TdAQdw+wEbgofNpFwHfbU8JUM2pEnXhsy6Qdz43Ae8PeN6cAz8VSPG1hZm8FPgac7e67YtsHzawvvP1y4Djg5+0p5XSZ0r73jcAFZnaAmS0kKOudrS5fijcB97v749GGTjy2FbW7NbjdP8DrCC7N7wZGw58zga8D28LtG4EjOqCsLyfonXAXcC/wiXD7y4AfAg8A/wLMa3dZY2U+CHgaOCS2rWOOLcEJaCcwRZAb/kDa8STobfOPBDW4bcBwB5T1QYL8dvS3++XwueeGfyOjwE+Bt3VAWVO/d+AT4XHdDpzRCX8H4fZrgA+VPbetx7aWH02BICJScD2fuhERKToFehGRglOgFxEpOAV6EZGCU6AXESk4BXoRkYJToBcRKbj/D1NedxTnBAiXAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7f2420673710>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.title(\"text length vs # instances\")\n",
    "plt.scatter(list(text_len_counter.keys()), list(text_len_counter.values()))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Check words frequencies"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "w_count_df = pd.DataFrame.from_dict(w_count, orient='index')\n",
    "w_count_df.sort_values(0, ascending=False, inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>the</th>\n",
       "      <td>18299</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>of</th>\n",
       "      <td>8709</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>and</th>\n",
       "      <td>6402</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>to</th>\n",
       "      <td>6282</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>in</th>\n",
       "      <td>4778</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>a</th>\n",
       "      <td>4279</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>was</th>\n",
       "      <td>3731</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>that</th>\n",
       "      <td>2888</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>he</th>\n",
       "      <td>2711</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>his</th>\n",
       "      <td>2023</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>for</th>\n",
       "      <td>1779</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>on</th>\n",
       "      <td>1768</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>had</th>\n",
       "      <td>1628</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>as</th>\n",
       "      <td>1589</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>with</th>\n",
       "      <td>1524</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>by</th>\n",
       "      <td>1519</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>at</th>\n",
       "      <td>1463</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>were</th>\n",
       "      <td>1435</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it</th>\n",
       "      <td>1362</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>which</th>\n",
       "      <td>1305</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>be</th>\n",
       "      <td>1135</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>from</th>\n",
       "      <td>1024</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>not</th>\n",
       "      <td>1014</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>this</th>\n",
       "      <td>992</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>is</th>\n",
       "      <td>937</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>or</th>\n",
       "      <td>932</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>but</th>\n",
       "      <td>874</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>one</th>\n",
       "      <td>782</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>have</th>\n",
       "      <td>780</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>oswald</th>\n",
       "      <td>776</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>eighteen:</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>lading</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sustain</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>inflict,</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>markets,</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>blow.</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ill-health</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>delirium</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>tremens,</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>dejection,</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>sacking</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>prize-fighter</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>scandalized</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>outshone</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>ferdinand</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>grain.</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fluctuations</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>attempt\"</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>action;</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>grains,</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>prices,</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>protectionists</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>depress</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>market,</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>election;</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>napoleon</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>french,</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>popularity</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>\"convulsive</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>lessening</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>22943 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                    0\n",
       "the             18299\n",
       "of               8709\n",
       "and              6402\n",
       "to               6282\n",
       "in               4778\n",
       "a                4279\n",
       "was              3731\n",
       "that             2888\n",
       "he               2711\n",
       "his              2023\n",
       "for              1779\n",
       "on               1768\n",
       "had              1628\n",
       "as               1589\n",
       "with             1524\n",
       "by               1519\n",
       "at               1463\n",
       "were             1435\n",
       "it               1362\n",
       "which            1305\n",
       "be               1135\n",
       "from             1024\n",
       "not              1014\n",
       "this              992\n",
       "is                937\n",
       "or                932\n",
       "but               874\n",
       "one               782\n",
       "have              780\n",
       "oswald            776\n",
       "...               ...\n",
       "eighteen:           1\n",
       "lading              1\n",
       "sustain             1\n",
       "inflict,            1\n",
       "markets,            1\n",
       "blow.               1\n",
       "ill-health          1\n",
       "delirium            1\n",
       "tremens,            1\n",
       "dejection,          1\n",
       "sacking             1\n",
       "prize-fighter       1\n",
       "scandalized         1\n",
       "outshone            1\n",
       "ferdinand           1\n",
       "grain.              1\n",
       "fluctuations        1\n",
       "attempt\"            1\n",
       "action;             1\n",
       "grains,             1\n",
       "prices,             1\n",
       "protectionists      1\n",
       "depress             1\n",
       "market,             1\n",
       "election;           1\n",
       "napoleon            1\n",
       "french,             1\n",
       "popularity          1\n",
       "\"convulsive         1\n",
       "lessening           1\n",
       "\n",
       "[22943 rows x 1 columns]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "w_count_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "11"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# check a certain word\n",
    "w_count_df.at['minute', 0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# fequency bar plot - it takes time!!\n",
    "w_count_df.plot.bar()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
